diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.h b/paddle/fluid/distributed/ps/service/communicator/communicator.h index da4e2f1a12898..9f8c998d3a1c2 100644 --- a/paddle/fluid/distributed/ps/service/communicator/communicator.h +++ b/paddle/fluid/distributed/ps/service/communicator/communicator.h @@ -35,12 +35,12 @@ limitations under the License. */ #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/split.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/distributed/ps/service/ps_client.h" @@ -180,7 +180,7 @@ inline void MergeVars(const std::string &var_name, // set output tensor to 0. paddle::platform::CPUDeviceContext cpu_ctx; - paddle::operators::math::SetConstant + pten::funcs::SetConstant constant_functor; constant_functor(cpu_ctx, out_t, static_cast(0)); // sum all vars to out diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h index 71b44f36d0107..5bbcdca88a1ce 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h +++ b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h @@ -38,9 +38,10 @@ #include "paddle/fluid/distributed/ps/service/ps_service/service.h" #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" + namespace paddle { namespace distributed { class GraphPyService { diff --git a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc index d7d9d1ed1bafd..dd79d67be752e 100644 --- a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace distributed { @@ -42,7 +42,6 @@ class DenseTensor; namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; diff --git a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc index 4f7b608c8bfb9..0dfaafb258121 100644 --- a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc @@ -22,8 +22,8 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" #include "paddle/fluid/distributed/ps/service/env.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace distributed { @@ -43,7 +43,6 @@ class DenseTensor; namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; diff --git a/paddle/fluid/distributed/test/brpc_utils_test.cc b/paddle/fluid/distributed/test/brpc_utils_test.cc index 608f647d148e4..7f18c86ac7e06 100644 --- a/paddle/fluid/distributed/test/brpc_utils_test.cc +++ b/paddle/fluid/distributed/test/brpc_utils_test.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps/service/brpc_utils.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace framework { @@ -28,7 +28,6 @@ class Variable; namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; @@ -42,7 +41,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place, lod1.push_back(framework::Vector({1, 3, 8})); tensor1->set_lod(lod1); tensor1->mutable_data(*place); - math::set_constant(ctx, tensor1, 31.9); + pten::funcs::set_constant(ctx, tensor1, 31.9); // var 2 framework::Variable* var2 = scope->Var("x2"); @@ -52,7 +51,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place, lod2.push_back(framework::Vector({1, 1})); tensor2->set_lod(lod2); tensor2->mutable_data(*place); - math::set_constant(ctx, tensor2, 100); + pten::funcs::set_constant(ctx, tensor2, 100); // var 3 framework::Variable* var3 = scope->Var("x3"); @@ -62,7 +61,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place, auto* rows = slr->mutable_rows(); tensor3->Resize(framework::make_ddim({564, 128})); tensor3->mutable_data(*place); - math::set_constant(ctx, tensor3, 32.7); + pten::funcs::set_constant(ctx, tensor3, 32.7); for (int i = 0; i < 564; ++i) rows->push_back(i); } diff --git a/paddle/fluid/distributed/test/graph_node_split_test.cc b/paddle/fluid/distributed/test/graph_node_split_test.cc index e808d2a81539a..6bbcb1d399657 100644 --- a/paddle/fluid/distributed/test/graph_node_split_test.cc +++ b/paddle/fluid/distributed/test/graph_node_split_test.cc @@ -36,14 +36,13 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; diff --git a/paddle/fluid/distributed/test/graph_node_test.cc b/paddle/fluid/distributed/test/graph_node_test.cc index 3243ebc389c85..4aa2839c181e9 100644 --- a/paddle/fluid/distributed/test/graph_node_test.cc +++ b/paddle/fluid/distributed/test/graph_node_test.cc @@ -36,14 +36,13 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index 90ae91db5f5f9..8bfeaf47b23c3 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace egr { diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu index b364cf9b31d56..316f8c4d90dc8 100644 --- a/paddle/fluid/framework/data_device_transform_test.cu +++ b/paddle/fluid/framework/data_device_transform_test.cu @@ -19,9 +19,9 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/framework/pten_utils.h" diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 688835cc3c93b..a014d34bcf5f0 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -14,7 +14,7 @@ #include "paddle/fluid/framework/data_layout_transform.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_reuse.h" #endif @@ -42,7 +42,7 @@ void CastDataLayout::apply() { auto place = ctx_->GetPlace(); if (platform::is_cpu_place(place)) { - operators::math::Transpose trans4; + pten::funcs::Transpose trans4; auto* context = static_cast(ctx_); trans4(*context, in_, out_, axis_); } else { diff --git a/paddle/fluid/framework/data_transform.h b/paddle/fluid/framework/data_transform.h index 385a5ff704f51..5c5d49f8fec77 100644 --- a/paddle/fluid/framework/data_transform.h +++ b/paddle/fluid/framework/data_transform.h @@ -22,10 +22,10 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc index 5dbc3e38ea135..cab7d5ddb8b5f 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc @@ -33,7 +33,7 @@ #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_DGC) #include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h" diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 9d37792653664..4c91ece049301 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -28,8 +28,8 @@ #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/tracer.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(sort_sum_gradient); @@ -103,7 +103,7 @@ void BasicEngine::Init( if (grad_tensor == nullptr) { grad_var->Resize(fwd_var.dims()); grad_var->mutable_data(fwd_var.place(), fwd_var.type()); - operators::math::set_constant(*dev_ctx, grad_var, 1.0); + pten::funcs::set_constant(*dev_ctx, grad_var, 1.0); } else { paddle::framework::TensorCopy( grad_tensor->Var().Get(), fwd_var.place(), @@ -156,7 +156,7 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) { VLOG(6) << "Set ungenerated Grad: " << var->Name() << " as zero with dtype " << framework::DataTypeToString(var->ForwardDataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } } } diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 75d4d8246e3c3..5eed7eca7a751 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -22,12 +22,12 @@ #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_XPU #include "xpu/refactor/math.h" #endif @@ -210,7 +210,7 @@ void TensorAddImpl(const framework::Tensor& src, framework::Tensor* dst, platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); paddle::platform::DeviceContext* ctx = pool.Get(place); auto dev_ctx = dynamic_cast(ctx); - operators::math::ElementwiseAddTo func; + pten::funcs::ElementwiseAddTo func; func(dev_ctx, src, dst); } @@ -703,12 +703,12 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr var, << var->Var().Get().dims(); tensor->Resize(var->Var().Get().dims()); tensor->mutable_data(place, var->DataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } else { auto* tensor = dst_var->MutableVar()->GetMutable(); tensor->mutable_data(place, var->DataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } } } @@ -835,12 +835,12 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr var, << var->Var().Get().dims(); tensor->Resize(var->Var().Get().dims()); tensor->mutable_data(place, var->DataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } else { auto* tensor = dst_var->MutableVar()->GetMutable(); tensor->mutable_data(place, var->DataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } } // looks like tmp_grad_vars will not have any member but just in case diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 5b8974b33485e..60e1291a08700 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -20,10 +20,10 @@ #include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/prepared_operator.h" #include "paddle/fluid/imperative/var_helper.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif @@ -229,7 +229,7 @@ void VarBase::ClearGradient(bool set_to_zero) { if (set_to_zero) { auto* dev_ctx = platform::DeviceContextPool::Instance().Get(grad_t->place()); - operators::math::set_constant(*dev_ctx, grad_t, 0.0); + pten::funcs::set_constant(*dev_ctx, grad_t, 0.0); } else { grad_t->clear(); } diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc index 45756083c9047..ed60a4dc0849b 100644 --- a/paddle/fluid/imperative/partial_grad_engine.cc +++ b/paddle/fluid/imperative/partial_grad_engine.cc @@ -28,10 +28,10 @@ #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/tracer.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/string/string_helper.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(sort_sum_gradient); @@ -316,7 +316,7 @@ static void FillConstantLike(const VariableWrapper &ref_var, } else { dst_tensor->mutable_data(place, ref_var.DataType()); } - operators::math::set_constant(*dev_ctx, dst_tensor, value); + pten::funcs::set_constant(*dev_ctx, dst_tensor, value); } /** diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index 54e27b2bd8c31..361b9eb0fe64f 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -755,7 +755,7 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) { {static_cast(length)}); } else { group_tensor.Resize({static_cast(length)}); - operators::math::set_constant(*dev_ctx, &group_tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, &group_tensor, 0.0); } #endif } diff --git a/paddle/fluid/imperative/reducer.h b/paddle/fluid/imperative/reducer.h index b99d7adc0c70a..b0317fe33e207 100644 --- a/paddle/fluid/imperative/reducer.h +++ b/paddle/fluid/imperative/reducer.h @@ -29,8 +29,8 @@ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace imperative { diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc index 6210cb108bd79..e91b0b0a7770e 100644 --- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc +++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace imperative = paddle::imperative; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/addmm_op.h b/paddle/fluid/operators/addmm_op.h index ecfd10d2fa6fb..8fe73d81b0272 100644 --- a/paddle/fluid/operators/addmm_op.h +++ b/paddle/fluid/operators/addmm_op.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/affine_grid_op.cu b/paddle/fluid/operators/affine_grid_op.cu index bcf7deefc98f0..d203dcb7b913c 100644 --- a/paddle/fluid/operators/affine_grid_op.cu +++ b/paddle/fluid/operators/affine_grid_op.cu @@ -170,7 +170,7 @@ class AffineGridGradOpCUDAKernel : public framework::OpKernel { w = size_attr[3]; } T* theta_grad_data = theta_grad->mutable_data({n, 2, 3}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.cuda_device_context(), theta_grad, static_cast(0)); T h_step; diff --git a/paddle/fluid/operators/affine_grid_op.h b/paddle/fluid/operators/affine_grid_op.h index 50c9ebcd9c8f5..129c7a61a7876 100644 --- a/paddle/fluid/operators/affine_grid_op.h +++ b/paddle/fluid/operators/affine_grid_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -61,7 +61,7 @@ inline void GetIdxMap(int n, int h, int w, bool align_corners, Tensor* grid, Tensor ones; ones.mutable_data({h, w, 1}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), &ones, static_cast(1)); auto ones_t = EigenTensor::From(ones); // Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and @@ -115,7 +115,7 @@ class AffineGridOpKernel : public framework::OpKernel { } auto* output = ctx.Output("Output"); output->mutable_data({n, h, w, 2}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), output, static_cast(0)); Tensor grid; @@ -158,7 +158,7 @@ class AffineGridGradOpKernel : public framework::OpKernel { w = size_attr[3]; } theta_grad->mutable_data({n, 2, 3}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), theta_grad, static_cast(0)); Tensor grid; diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc index a80b83f0cbe51..6390a1f4738d9 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc @@ -24,12 +24,11 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; using Tensor = paddle::framework::Tensor; diff --git a/paddle/fluid/operators/assign_op_npu_test.cc b/paddle/fluid/operators/assign_op_npu_test.cc index 049cfb8046f80..4761ec6155666 100644 --- a/paddle/fluid/operators/assign_op_npu_test.cc +++ b/paddle/fluid/operators/assign_op_npu_test.cc @@ -24,12 +24,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(assign); USE_OP_DEVICE_KERNEL(assign, NPU); diff --git a/paddle/fluid/operators/average_accumulates_op.h b/paddle/fluid/operators/average_accumulates_op.h index 6813f56675826..3cd235d89a327 100644 --- a/paddle/fluid/operators/average_accumulates_op.h +++ b/paddle/fluid/operators/average_accumulates_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -78,7 +78,7 @@ class AverageAccumulatesKernel : public framework::OpKernel { // Compute auto& place = *ctx.template device_context().eigen_device(); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; ++num_updates; ++num_accumulates; out_sum_1_tensor.device(place) = in_sum_1_tensor + param_tensor; diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 0a8e753c01dc0..8e960ff89bf51 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -989,7 +989,7 @@ class BatchNormDoubleGradKernel (data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); const int sample_size = X->numel() / C; - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; const T *mean_data = Saved_mean->data(); const T *inv_var_data = Saved_variance->data(); diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index 5f32d697bae40..85bd8451b8d70 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -25,9 +25,9 @@ namespace cub = hipcub; #endif #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/batch_norm_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/norm_utils.cu.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(cudnn_batchnorm_spatial_persistent); @@ -967,7 +967,8 @@ class BatchNormGradKernel if (d_x) { framework::TensorCopy(*d_y, ctx.GetPlace(), d_x); } - math::SetConstant> + pten::funcs::SetConstant> functor; functor(dev_ctx, d_scale, static_cast>(0)); functor(dev_ctx, d_bias, static_cast>(0)); diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index 32e956e15282a..55f1964cf5c55 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/layout_utils.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/norm_utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/batch_size_like.h b/paddle/fluid/operators/batch_size_like.h index f24a3c316a05a..1ee0e7002aba3 100644 --- a/paddle/fluid/operators/batch_size_like.h +++ b/paddle/fluid/operators/batch_size_like.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/bilinear_tensor_product_op.h b/paddle/fluid/operators/bilinear_tensor_product_op.h index 8f6c9b60dcad5..c7eb70c290e17 100644 --- a/paddle/fluid/operators/bilinear_tensor_product_op.h +++ b/paddle/fluid/operators/bilinear_tensor_product_op.h @@ -111,7 +111,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { ctx.GetPlace()); auto y_scale_mat = EigenMatrix::From(y_scale); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; if (d_x) { d_x->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/bincount_op.cu b/paddle/fluid/operators/bincount_op.cu index 5964b9e345e93..dd7804625a77c 100644 --- a/paddle/fluid/operators/bincount_op.cu +++ b/paddle/fluid/operators/bincount_op.cu @@ -105,7 +105,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) { if (!has_weights) { int64_t* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, 0L); KernelBincount<<mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); @@ -125,7 +125,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) { input_data, input_numel, has_weights, weights_data, output_data); } else { double* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); diff --git a/paddle/fluid/operators/bincount_op.h b/paddle/fluid/operators/bincount_op.h index a142332bce266..3f4334099e277 100644 --- a/paddle/fluid/operators/bincount_op.h +++ b/paddle/fluid/operators/bincount_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -64,7 +64,7 @@ void BincountInner(const framework::ExecutionContext& context) { const auto& weights_type = weights->type(); if (weights_type == framework::proto::VarType::FP32) { float* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); for (int64_t i = 0; i < input_numel; i++) { @@ -72,7 +72,7 @@ void BincountInner(const framework::ExecutionContext& context) { } } else { double* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); for (int64_t i = 0; i < input_numel; i++) { @@ -82,7 +82,7 @@ void BincountInner(const framework::ExecutionContext& context) { } else { int64_t* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, 0L); for (int64_t i = 0; i < input_numel; i++) { output_data[input_data[i]] += 1L; diff --git a/paddle/fluid/operators/bmm_op.h b/paddle/fluid/operators/bmm_op.h index 15cd6de91365e..7a0ddd4582341 100644 --- a/paddle/fluid/operators/bmm_op.h +++ b/paddle/fluid/operators/bmm_op.h @@ -21,7 +21,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h index bebaf6e3365c0..559d3e14edd49 100644 --- a/paddle/fluid/operators/bpr_loss_op.h +++ b/paddle/fluid/operators/bpr_loss_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/broadcast_tensors_op.h b/paddle/fluid/operators/broadcast_tensors_op.h index 0eeb9234df0fe..4161b5879f698 100644 --- a/paddle/fluid/operators/broadcast_tensors_op.h +++ b/paddle/fluid/operators/broadcast_tensors_op.h @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define SWITCH_OUT_RANK_CASE(n) \ case n: { \ diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index 5655fd25ec24b..d71d6fc39b119 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -18,8 +18,8 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_memory_aligment.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #endif @@ -65,11 +65,11 @@ struct FillConstantVisitor { .stream(); runner.Run(stream); } else { - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(dev_ctx_, tensor_, static_cast(value_)); } #else - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(dev_ctx_, tensor_, static_cast(value_)); #endif } diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc index ecf682aa52432..a51e81a4279d4 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" @@ -43,7 +43,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_allgather); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc index fa134b60e28de..f273e31f6b00f 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" @@ -43,7 +43,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_allreduce_max); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc index 3e91220423e6a..66efcd2a49072 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -45,7 +45,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_allreduce_sum); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc index 1ea34c8200333..acfdd42a41fd2 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -40,7 +40,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_broadcast); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc index d589d0a25e694..ee0463f84b126 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_reduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -40,7 +40,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_reduce_sum); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc index db78652f87980..652bf0c1f2a86 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" @@ -43,7 +43,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_reducescatter); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc index 5778a270f1992..9d27d99b3ab35 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc @@ -26,12 +26,11 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, NPU); diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc index e701783568694..9d8837864784f 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -40,7 +40,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_broadcast); USE_OP_DEVICE_KERNEL(c_sync_comm_stream, NPU); diff --git a/paddle/fluid/operators/collective/checknumeric_npu_test.cc b/paddle/fluid/operators/collective/checknumeric_npu_test.cc index 2be37cc456b97..18b75d8e68575 100644 --- a/paddle/fluid/operators/collective/checknumeric_npu_test.cc +++ b/paddle/fluid/operators/collective/checknumeric_npu_test.cc @@ -28,8 +28,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -41,7 +41,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_allreduce_sum); USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU); diff --git a/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc b/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc index edd4b18b35a6d..bf96f48bc8795 100644 --- a/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc +++ b/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" #include "paddle/fluid/operators/collective/recv_v2_op.h" @@ -40,7 +40,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(recv_v2); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/send_v2_op_npu_test.cc b/paddle/fluid/operators/collective/send_v2_op_npu_test.cc index b2470ab4c0570..748a4fb99b4a5 100644 --- a/paddle/fluid/operators/collective/send_v2_op_npu_test.cc +++ b/paddle/fluid/operators/collective/send_v2_op_npu_test.cc @@ -26,8 +26,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" #include "paddle/fluid/operators/collective/send_v2_op.h" @@ -39,7 +39,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(send_v2); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index eeb410eba2b4c..f961e479ce47c 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/controlflow/conditional_block_op.h" #include "paddle/fluid/operators/assign_op.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -245,7 +245,7 @@ class ConditionalBlockGradOp : public ConditionalOp { outside_tensor->mutable_data(place, input_tensor.type()); const platform::DeviceContext *dev_ctx = platform::DeviceContextPool::Instance().Get(place); - math::set_constant(*dev_ctx, outside_tensor, 0.0f); + pten::funcs::set_constant(*dev_ctx, outside_tensor, 0.0f); outside_tensor->set_lod(input_tensor.lod()); } }; diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index cbe78d9a25b50..df8ff56de9fad 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -858,7 +858,7 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel { auto dX = ctx.Output("DInput"); if (ddO) { ddO->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, ddO, static_cast(0)); } if (dW) { diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h index 94d1f707b74c2..fb22765d76ea6 100644 --- a/paddle/fluid/operators/conv_op.h +++ b/paddle/fluid/operators/conv_op.h @@ -485,7 +485,7 @@ class GemmConvGradKernel : public framework::OpKernel { col_matrix.Resize(col_matrix_shape); } - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); if (input_grad) { @@ -692,7 +692,7 @@ class GemmConvDoubleGradKernel : public framework::OpKernel { col_matrix.Resize(col_matrix_shape); } - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); // dx convolution double grad: gemm + col2im(col2vol) @@ -991,7 +991,7 @@ class DepthwiseConvGradKernel : public framework::OpKernel { paddings.erase(paddings.begin() + i + 1); } } - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); if (input_grad) { diff --git a/paddle/fluid/operators/conv_shift_op.cu b/paddle/fluid/operators/conv_shift_op.cu index 2289104d2dbfb..aca3bf9ae2749 100644 --- a/paddle/fluid/operators/conv_shift_op.cu +++ b/paddle/fluid/operators/conv_shift_op.cu @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/conv_shift_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -163,7 +163,7 @@ class ConvShiftGradKernel auto &device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; const int x_per_block = 256; int num_x_blocks = DivUp(x_width, x_per_block); diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu b/paddle/fluid/operators/conv_transpose_cudnn_op.cu index 19c0be44a1d0b..32792d6d47fd5 100644 --- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu +++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_cudnn_helper.h" #endif #include "paddle/fluid/operators/conv_transpose_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/padding.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -34,7 +34,7 @@ static void DataTranspose(const framework::ExecutionContext& ctx, const Tensor* input, Tensor* output, const std::vector& axis, int flag = 0) { auto& dev_ctx = ctx.template device_context(); - math::Transpose transpose; + pten::funcs::Transpose transpose; auto in_dims = input->dims(); std::vector input_transpose_vec; for (size_t i = 0; i < axis.size(); ++i) { @@ -650,7 +650,7 @@ class CUDNNConvTransposeDoubleGradOpKernel : public framework::OpKernel { if (ddO) { ddO->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, ddO, static_cast(0)); } if (dW) { diff --git a/paddle/fluid/operators/conv_transpose_op.h b/paddle/fluid/operators/conv_transpose_op.h index b8335c7506428..7b1fb6901e39b 100644 --- a/paddle/fluid/operators/conv_transpose_op.h +++ b/paddle/fluid/operators/conv_transpose_op.h @@ -226,7 +226,7 @@ class GemmConvTransposeKernel : public framework::OpKernel { filter.Resize(filter_matrix_shape); output->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); auto blas = math::GetBlas(dev_ctx); set_zero(dev_ctx, output, static_cast(0)); @@ -437,7 +437,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { col_matrix.Resize(col_matrix_shape); Tensor filter_grad_; - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; math::Im2ColFunctor im2col; math::Vol2ColFunctor vol2col; @@ -628,7 +628,7 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel { output->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, output, static_cast(0)); math::DepthwiseConvInputGradFunctor @@ -690,7 +690,7 @@ class DepthwiseConvTransposeGradKernel : public framework::OpKernel { } if (filter_grad) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; filter_grad->mutable_data(context.GetPlace()); set_zero(dev_ctx, filter_grad, static_cast(0)); diff --git a/paddle/fluid/operators/cos_sim_op.h b/paddle/fluid/operators/cos_sim_op.h index 0b4e3f7746741..f8b984e1159a8 100644 --- a/paddle/fluid/operators/cos_sim_op.h +++ b/paddle/fluid/operators/cos_sim_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/cos_sim_functor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -121,7 +121,7 @@ class CosSimGradKernel : public framework::OpKernel { if (out_grad_y) { out_grad_y->Resize(in_y->dims()); out_grad_y->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, out_grad_y, static_cast(0)); diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index 33108251b3b46..8ca819de06c97 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/jit/kernels.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -36,7 +36,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { auto* decoded_path = ctx.Output("ViterbiPath"); int64_t* path = decoded_path->mutable_data(platform::CPUPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), decoded_path, 0); bool has_length = ctx.HasInput("Length"); diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 8424fc4376fd7..19ab6afd7fb1f 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu index 67bd71d4a1be3..bd0b0ac0bc957 100644 --- a/paddle/fluid/operators/ctc_align_op.cu +++ b/paddle/fluid/operators/ctc_align_op.cu @@ -128,7 +128,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { if (host_out_lod0.back() == 0) { output->Resize({1, 1}); output->mutable_data(ctx.GetPlace()); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(ctx.template device_context(), output, -1); } diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h index 662f899c0a593..b79c3aeac4957 100644 --- a/paddle/fluid/operators/ctc_align_op.h +++ b/paddle/fluid/operators/ctc_align_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/cudnn_lstm_op.cu.cc b/paddle/fluid/operators/cudnn_lstm_op.cu.cc index 8adf556b4cd3d..5c899ac557f52 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/operators/cudnn_lstm_cache.h" #endif @@ -366,7 +366,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { } Tensor weight_grad; - math::SetConstant zero; + pten::funcs::SetConstant zero; weight_grad.mutable_data({weight_numel}, ctx.GetPlace()); zero(dev_ctx, &weight_grad, static_cast(0.0)); T *weight_grad_data = weight_grad.data(); diff --git a/paddle/fluid/operators/cvm_op.cc b/paddle/fluid/operators/cvm_op.cc index be7d4780f83ae..a84357b6e43d3 100644 --- a/paddle/fluid/operators/cvm_op.cc +++ b/paddle/fluid/operators/cvm_op.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/cvm_op.h" #include -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/deformable_conv_filter.cu.h b/paddle/fluid/operators/deformable_conv_filter.cu.h index f466d1803f819..75d16ae0d43db 100644 --- a/paddle/fluid/operators/deformable_conv_filter.cu.h +++ b/paddle/fluid/operators/deformable_conv_filter.cu.h @@ -23,7 +23,7 @@ #pragma once #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" template __global__ void FilterGradAddupCUDAKernel(const int nthreads, const int n, diff --git a/paddle/fluid/operators/deformable_conv_func.h b/paddle/fluid/operators/deformable_conv_func.h index 99d1d7c4776c3..134a1ea06d946 100644 --- a/paddle/fluid/operators/deformable_conv_func.h +++ b/paddle/fluid/operators/deformable_conv_func.h @@ -23,8 +23,8 @@ #pragma once #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" template HOSTDEVICE T DmcnGetGradientWeight(T argmax_h, T argmax_w, const int h, diff --git a/paddle/fluid/operators/deformable_conv_op.cu b/paddle/fluid/operators/deformable_conv_op.cu index 924adafa4b8d8..97d2f71758fb5 100644 --- a/paddle/fluid/operators/deformable_conv_op.cu +++ b/paddle/fluid/operators/deformable_conv_op.cu @@ -26,8 +26,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/deformable_conv_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -623,7 +623,7 @@ class DeformableConvGradCUDAKernel : public framework::OpKernel { Tensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer).Resize(col_buffer_3d_shape); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); col_buffer.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_conv_op.h b/paddle/fluid/operators/deformable_conv_op.h index 4be98f3e6c092..a5c0404ed3a5d 100644 --- a/paddle/fluid/operators/deformable_conv_op.h +++ b/paddle/fluid/operators/deformable_conv_op.h @@ -27,7 +27,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/deformable_conv_func.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -489,7 +489,7 @@ class DeformableConvGradCPUKernel : public framework::OpKernel { Tensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer).Resize(col_buffer_3d_shape); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); col_buffer.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_conv_v1_op.cu b/paddle/fluid/operators/deformable_conv_v1_op.cu index c252700528c49..8f6c5a226bc86 100644 --- a/paddle/fluid/operators/deformable_conv_v1_op.cu +++ b/paddle/fluid/operators/deformable_conv_v1_op.cu @@ -29,8 +29,8 @@ #include "paddle/fluid/operators/deformable_conv_func.h" #include "paddle/fluid/operators/deformable_conv_v1_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -489,7 +489,7 @@ class DeformableConvV1GradCUDAKernel : public framework::OpKernel { Tensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer).Resize(col_buffer_3d_shape); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); col_buffer.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_conv_v1_op.h b/paddle/fluid/operators/deformable_conv_v1_op.h index 92b19e390466a..1ddc31c93eaaa 100644 --- a/paddle/fluid/operators/deformable_conv_v1_op.h +++ b/paddle/fluid/operators/deformable_conv_v1_op.h @@ -28,7 +28,7 @@ #include "paddle/fluid/operators/deformable_conv_func.h" #include "paddle/fluid/operators/deformable_conv_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -451,7 +451,7 @@ class DeformableConvV1GradCPUKernel : public framework::OpKernel { Tensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer).Resize(col_buffer_3d_shape); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); col_buffer.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cu b/paddle/fluid/operators/deformable_psroi_pooling_op.cu index eeb2c7692b5d5..95f05963cd1f6 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu @@ -31,8 +31,8 @@ #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/deformable_psroi_pooling_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -436,7 +436,7 @@ class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel { Tensor* input_grad = ctx.Output(framework::GradVarName("Input")); Tensor* trans_grad = ctx.Output(framework::GradVarName("Trans")); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.cuda_device_context(); if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.h b/paddle/fluid/operators/deformable_psroi_pooling_op.h index a986f915e261b..08b8342a1fd69 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.h +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h @@ -27,7 +27,7 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -165,7 +165,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { auto* top_count = ctx.Output("TopCount"); top_count->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, out, static_cast(0)); set_zero(dev_ctx, top_count, static_cast(0)); @@ -421,7 +421,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel { auto* top_count = ctx.Input("TopCount"); auto* output_grad = ctx.Input(framework::GradVarName("Output")); auto* input_grad = ctx.Output(framework::GradVarName("Input")); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/anchor_generator_op.h b/paddle/fluid/operators/detection/anchor_generator_op.h index 599f6935736f9..f888787cf51ae 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.h +++ b/paddle/fluid/operators/detection/anchor_generator_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/bbox_util.cu.h b/paddle/fluid/operators/detection/bbox_util.cu.h index c6754f62cc74e..c4ae795a5078a 100644 --- a/paddle/fluid/operators/detection/bbox_util.cu.h +++ b/paddle/fluid/operators/detection/bbox_util.cu.h @@ -24,9 +24,9 @@ limitations under the License. */ namespace cub = hipcub; #endif #include "paddle/fluid/operators/gather.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index 5cd853758926e..582f81d71aa60 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -205,9 +205,9 @@ class BipartiteMatchKernel : public framework::OpKernel { match_indices->mutable_data({n, col}, context.GetPlace()); match_dist->mutable_data({n, col}, context.GetPlace()); - math::SetConstant iset; + pten::funcs::SetConstant iset; iset(dev_ctx, match_indices, static_cast(-1)); - math::SetConstant tset; + pten::funcs::SetConstant tset; tset(dev_ctx, match_dist, static_cast(0)); int* indices = match_indices->data(); diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index 53727d9d08747..24f5f00b07727 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/box_clip_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h index e24cefdcd7b72..5c1870e902334 100644 --- a/paddle/fluid/operators/detection/box_clip_op.h +++ b/paddle/fluid/operators/detection/box_clip_op.h @@ -13,7 +13,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/box_coder_op.h b/paddle/fluid/operators/detection/box_coder_op.h index d120ebbeb4de5..b4fe27401db08 100644 --- a/paddle/fluid/operators/detection/box_coder_op.h +++ b/paddle/fluid/operators/detection/box_coder_op.h @@ -13,7 +13,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h index e66a8351f4761..1fe05e6ebbffb 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu index eddb25d57b47c..70cbd7a9dea26 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu @@ -195,7 +195,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { Tensor length_lod; int* length_lod_data = length_lod.mutable_data({lod_size}, dev_ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, &length_lod, static_cast(0)); int blocks = NumBlocks(real_post_num); diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h index 950b8b78933bf..984b6332918a0 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h @@ -22,7 +22,7 @@ limitations under the License.*/ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu index 355a35d4dd21b..84d564ac4e94c 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu @@ -25,9 +25,9 @@ namespace cub = hipcub; #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/detection/distribute_fpn_proposals_op.h" #include "paddle/fluid/operators/gather.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -121,7 +121,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel { Tensor sub_lod_list; sub_lod_list.Resize({num_level, lod_size}); int* sub_lod_list_data = sub_lod_list.mutable_data(dev_ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, &sub_lod_list, static_cast(0)); Tensor target_lvls; diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h index f1b454913f742..e96804ab6f641 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc index afa4ccf25d00f..92dba742f4cdf 100644 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/mask_util.h" #include "paddle/fluid/operators/gather.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -122,7 +122,7 @@ static inline void ExpandMaskTarget(const platform::CPUDeviceContext& ctx, int* mask_targets_data = mask_targets->mutable_data({num_mask, mask_dim}, ctx.GetPlace()); - math::set_constant(ctx, mask_targets, -1); + pten::funcs::set_constant(ctx, mask_targets, -1); for (int64_t mask_id = 0; mask_id < num_mask; ++mask_id) { int cls = mask_class_labels_data[mask_id]; int start = M * cls; @@ -271,7 +271,7 @@ std::vector SampleMaskForOneImage( } masks.mutable_data({bg_num, resolution * resolution}, ctx.GetPlace()); - math::set_constant(ctx, &masks, -1); + pten::funcs::set_constant(ctx, &masks, -1); int* mask_class_labels_data = mask_class_labels.mutable_data({bg_num, 1}, ctx.GetPlace()); mask_class_labels_data[0] = 0; diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index 1b1fa7b064f54..67a1d2c5acf1f 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/gather.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -289,7 +289,7 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context, fg_labels.mutable_data({fg_num}, context.GetPlace()); CPUGather(context, gt_classes, gt_label_inds_t, &fg_labels); bg_labels.mutable_data({bg_num}, context.GetPlace()); - math::set_constant(context, &bg_labels, 0); + pten::funcs::set_constant(context, &bg_labels, 0); Concat(context, fg_labels, bg_labels, sampled_labels); Tensor fg_max_overlap, bg_max_overlap; @@ -328,7 +328,7 @@ std::vector SampleRoisForOneImage( Tensor roi_filter; // Tensor box_filter; if (keep.numel() == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); set_zero(context, &roi_filter, static_cast(0)); } else { @@ -403,9 +403,9 @@ std::vector SampleRoisForOneImage( bbox_targets.mutable_data(bbox_expand_dim, context.GetPlace()); bbox_inside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); bbox_outside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); - math::set_constant(context, &bbox_targets, 0.0); - math::set_constant(context, &bbox_inside_weights, 0.0); - math::set_constant(context, &bbox_outside_weights, 0.0); + pten::funcs::set_constant(context, &bbox_targets, 0.0); + pten::funcs::set_constant(context, &bbox_inside_weights, 0.0); + pten::funcs::set_constant(context, &bbox_outside_weights, 0.0); auto* bbox_targets_single_data = bbox_targets_single.data(); auto* sampled_labels_data = sampled_labels.data(); diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index bc48c3b5ba17a..570720550bf8a 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/detection/nms_util.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -112,7 +112,7 @@ class GenerateProposalsKernel : public framework::OpKernel { scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); - math::Transpose trans; + pten::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); @@ -211,7 +211,7 @@ class GenerateProposalsKernel : public framework::OpKernel { FilterBoxes(ctx, &proposals, min_size, im_info_slice, true, &keep); // Handle the case when there is no keep index left if (keep.numel() == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; bbox_sel.mutable_data({1, 4}, ctx.GetPlace()); set_zero(ctx, &bbox_sel, static_cast(0)); Tensor scores_filter; diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index 2de06e06d9ad3..f34b8e26c0d5f 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/detection/bbox_util.cu.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -76,7 +76,7 @@ static std::pair ProposalForOneImage( Tensor scores_filter, proposals_filter; // Handle the case when there is no keep index left if (keep_num == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; proposals_filter.mutable_data({1, 4}, ctx.GetPlace()); scores_filter.mutable_data({1, 1}, ctx.GetPlace()); set_zero(ctx, &proposals_filter, static_cast(0)); @@ -154,7 +154,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); - math::Transpose trans; + pten::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc index 44554a941dce4..671a27429f283 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/detection/nms_util.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -113,7 +113,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel { scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); - math::Transpose trans; + pten::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); @@ -215,7 +215,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel { pixel_offset); // Handle the case when there is no keep index left if (keep.numel() == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; bbox_sel.mutable_data({1, 4}, ctx.GetPlace()); set_zero(ctx, &bbox_sel, static_cast(0)); Tensor scores_filter; diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cu b/paddle/fluid/operators/detection/generate_proposals_v2_op.cu index cc2d4578e3eb1..98108a25dade9 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cu @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/detection/bbox_util.cu.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -77,7 +77,7 @@ static std::pair ProposalForOneImage( Tensor scores_filter, proposals_filter; // Handle the case when there is no keep index left if (keep_num == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; proposals_filter.mutable_data({1, 4}, ctx.GetPlace()); scores_filter.mutable_data({1, 1}, ctx.GetPlace()); set_zero(ctx, &proposals_filter, static_cast(0)); @@ -157,7 +157,7 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel { scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); - math::Transpose trans; + pten::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); diff --git a/paddle/fluid/operators/detection/prior_box_op.h b/paddle/fluid/operators/detection/prior_box_op.h index 21ac74f25cb7e..94413c9c83544 100644 --- a/paddle/fluid/operators/detection/prior_box_op.h +++ b/paddle/fluid/operators/detection/prior_box_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index 4d0c9da2eebe6..777e69ab7b4b9 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu index fbf631f75b61f..ff8da478a00f7 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" using paddle::platform::PADDLE_CUDA_NUM_THREADS; using paddle::platform::float16; @@ -356,7 +356,7 @@ class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel { T* out2in_w_data = out2in_w->mutable_data({out->numel(), 4}, ctx.GetPlace()); - math::SetConstant init; + pten::funcs::SetConstant init; init(ctx.cuda_device_context(), out2in_idx, static_cast(-1)); auto transformed_height = ctx.Attr("transformed_height"); @@ -482,7 +482,7 @@ class CUDAROIPerspectiveTransformGradOpKernel : public framework::OpKernel { T* in_grad_data = in_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.cuda_device_context(), in_grad, static_cast(0)); const T* out_grad_data = out_grad->data(); diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index 2a16e20c2a723..cf7afc3853d4d 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/yolo_box_op.cu b/paddle/fluid/operators/detection/yolo_box_op.cu index bfe4742c4b3c3..7cc66f2074df0 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cu +++ b/paddle/fluid/operators/detection/yolo_box_op.cu @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/detection/yolo_box_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -114,7 +114,7 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel { T* boxes_data = boxes->mutable_data({n, box_num, 4}, ctx.GetPlace()); T* scores_data = scores->mutable_data({n, box_num, class_num}, ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, boxes, static_cast(0)); set_zero(dev_ctx, scores, static_cast(0)); platform::GpuLaunchConfig config = diff --git a/paddle/fluid/operators/detection/yolo_box_op.h b/paddle/fluid/operators/detection/yolo_box_op.h index 31a67ecc26635..27fe31587e4b0 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.h +++ b/paddle/fluid/operators/detection/yolo_box_op.h @@ -13,8 +13,8 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.h b/paddle/fluid/operators/detection/yolov3_loss_op.h index 1acfb2cf4e50f..1ab3039b2e856 100644 --- a/paddle/fluid/operators/detection/yolov3_loss_op.h +++ b/paddle/fluid/operators/detection/yolov3_loss_op.h @@ -13,7 +13,7 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -305,7 +305,7 @@ class Yolov3LossKernel : public framework::OpKernel { Tensor gtscore; if (!gt_score) { gtscore.mutable_data({n, b}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), >score, static_cast(1.0)); gt_score = >score; @@ -461,7 +461,7 @@ class Yolov3LossGradKernel : public framework::OpKernel { Tensor gtscore; if (!gt_score) { gtscore.mutable_data({n, b}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), >score, static_cast(1.0)); gt_score = >score; diff --git a/paddle/fluid/operators/determinant_op.h b/paddle/fluid/operators/determinant_op.h index 4c17869fb5d2a..90443e0928ba2 100644 --- a/paddle/fluid/operators/determinant_op.h +++ b/paddle/fluid/operators/determinant_op.h @@ -150,7 +150,7 @@ inline bool CheckMatrixInvertible(const framework::ExecutionContext& ctx, auto* data = dev_tensor.mutable_data({1}, ctx.GetPlace()); // set false - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, &dev_tensor, false); // find whether zero @@ -208,7 +208,7 @@ class DeterminantGradKernel : public framework::OpKernel { VLOG(3) << "The input matrix not invertible!"; ddet->Resize(input->dims()); ddet->mutable_data(context.GetPlace()); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, ddet, static_cast(0.0f)); return; } @@ -363,7 +363,7 @@ class SlogDeterminantGradKernel : public framework::OpKernel { VLOG(3) << "The input matrix not invertible!"; dslogdet->Resize(input->dims()); dslogdet->mutable_data(context.GetPlace()); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, dslogdet, std::numeric_limits::quiet_NaN()); return; } diff --git a/paddle/fluid/operators/dgc_op.h b/paddle/fluid/operators/dgc_op.h index 12ffc948336c3..4a81537b8c8be 100644 --- a/paddle/fluid/operators/dgc_op.h +++ b/paddle/fluid/operators/dgc_op.h @@ -187,7 +187,7 @@ class DGCOpKernel : public framework::OpKernel { "V_out numel error, V_out numel is %d.", v_out->numel())); } - math::SetConstant tset; + pten::funcs::SetConstant tset; tset(dev_ctx, grad_out, static_cast(0)); } }; diff --git a/paddle/fluid/operators/diag_embed_op.h b/paddle/fluid/operators/diag_embed_op.h index aff7d7e48a8d4..922140b5b8096 100644 --- a/paddle/fluid/operators/diag_embed_op.h +++ b/paddle/fluid/operators/diag_embed_op.h @@ -17,8 +17,8 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -70,7 +70,7 @@ class DiagEmbedKernel : public framework::OpKernel { auto* input_data = input->data(); T* out_data = out->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, out, static_cast(0.0)); diff --git a/paddle/fluid/operators/diag_op.h b/paddle/fluid/operators/diag_op.h index f89415ae08974..09723e6df6bdc 100644 --- a/paddle/fluid/operators/diag_op.h +++ b/paddle/fluid/operators/diag_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -45,7 +45,7 @@ class DiagKernel : public framework::OpKernel { auto* out = context.Output("Out"); T* out_data = out->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, out, static_cast(0)); diff --git a/paddle/fluid/operators/diag_v2_op.cc b/paddle/fluid/operators/diag_v2_op.cc index dd5ad739506e0..3e74c7aa8104e 100644 --- a/paddle/fluid/operators/diag_v2_op.cc +++ b/paddle/fluid/operators/diag_v2_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/diag_v2_op.h" #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -109,7 +109,7 @@ class DiagV2Kernel : public framework::OpKernel { int64_t i; if (x_dims.size() == 1) { float padding_value = context.Attr("padding_value"); - math::SetConstant set_padding_value; + pten::funcs::SetConstant set_padding_value; auto& dev_ctx = context.template device_context(); set_padding_value(dev_ctx, out, static_cast(padding_value)); diff --git a/paddle/fluid/operators/diag_v2_op.cu b/paddle/fluid/operators/diag_v2_op.cu index 12ea31945f8d0..02e531765ce87 100644 --- a/paddle/fluid/operators/diag_v2_op.cu +++ b/paddle/fluid/operators/diag_v2_op.cu @@ -72,7 +72,7 @@ class DiagV2CUDAKernel : public framework::OpKernel { if (x_dims.size() == 1) { float padding_value = context.Attr("padding_value"); - math::SetConstant set_padding_value; + pten::funcs::SetConstant set_padding_value; set_padding_value(dev_ctx, out, static_cast(padding_value)); auto x_length = x_dims[0]; diff --git a/paddle/fluid/operators/diag_v2_op.h b/paddle/fluid/operators/diag_v2_op.h index 7850def06117f..0d1d6cd86e440 100644 --- a/paddle/fluid/operators/diag_v2_op.h +++ b/paddle/fluid/operators/diag_v2_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dist_op.h b/paddle/fluid/operators/dist_op.h index 6a34ef48a169d..2d4620eca7228 100644 --- a/paddle/fluid/operators/dist_op.h +++ b/paddle/fluid/operators/dist_op.h @@ -19,7 +19,7 @@ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -171,7 +171,7 @@ static void DistGradFunction(const framework::ExecutionContext& context) { // 1: Lp-norm(z), z = x-y, compute dz if (p == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, &grad, static_cast(0)); } else if (p == INFINITY || p == -INFINITY) { diff --git a/paddle/fluid/operators/dropout_op_test.cc b/paddle/fluid/operators/dropout_op_test.cc index 5c9be588419e3..a268ef95e33e9 100644 --- a/paddle/fluid/operators/dropout_op_test.cc +++ b/paddle/fluid/operators/dropout_op_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(dropout); diff --git a/paddle/fluid/operators/edit_distance_op.cu b/paddle/fluid/operators/edit_distance_op.cu index 3096795f3eaf0..be6534365e5d7 100644 --- a/paddle/fluid/operators/edit_distance_op.cu +++ b/paddle/fluid/operators/edit_distance_op.cu @@ -16,9 +16,9 @@ limitations under the License. */ #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/edit_distance_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -118,7 +118,7 @@ class EditDistanceGPUKernel : public framework::OpKernel { } const size_t num_strs = hyp_lod.size() - 1; - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(ctx.template device_context(), sequence_num, static_cast(num_strs)); diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index b9a3cb300b4c2..4dd5b7cfd8499 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -19,11 +19,11 @@ #include #include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/lapack_function.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define EPSILON 1e-6 namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index 34d40c741f038..57b47d436da57 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -47,8 +47,8 @@ limitations under the License. */ #endif -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define DIVUP(x, y) (((x) + (y)-1) / (y)) diff --git a/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc b/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc index 3cd9729d3443c..63ec5bd4a2805 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc +++ b/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, NPU); diff --git a/paddle/fluid/operators/expand_op_npu_test.cc b/paddle/fluid/operators/expand_op_npu_test.cc index 7de2bf2e6990d..4e18cc73d290f 100644 --- a/paddle/fluid/operators/expand_op_npu_test.cc +++ b/paddle/fluid/operators/expand_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(expand); USE_OP_DEVICE_KERNEL(expand, NPU); diff --git a/paddle/fluid/operators/exponential_op.h b/paddle/fluid/operators/exponential_op.h index d8cafb8ef7f02..88c891d8bff56 100644 --- a/paddle/fluid/operators/exponential_op.h +++ b/paddle/fluid/operators/exponential_op.h @@ -18,7 +18,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/distribution_helper.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -32,7 +32,7 @@ class ExponentialGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto& dev_ctx = ctx.template device_context(); functor(dev_ctx, dx, static_cast(0)); } diff --git a/paddle/fluid/operators/eye_op.h b/paddle/fluid/operators/eye_op.h index d5ad27596d6ba..1aa22e74f753d 100644 --- a/paddle/fluid/operators/eye_op.h +++ b/paddle/fluid/operators/eye_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -47,7 +47,7 @@ class EyeKernel : public framework::OpKernel { auto* out_tensor = ctx.Output("Out"); T* out_data = out_tensor->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, out_tensor, static_cast(0)); diff --git a/paddle/fluid/operators/feed_forward_test.cu b/paddle/fluid/operators/feed_forward_test.cu index dea427393b175..551d8ee6592df 100644 --- a/paddle/fluid/operators/feed_forward_test.cu +++ b/paddle/fluid/operators/feed_forward_test.cu @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/fused/attn_feed_forward.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/fill_any_op.h b/paddle/fluid/operators/fill_any_op.h index f483e05a08fd6..a476b7a0a6ef9 100644 --- a/paddle/fluid/operators/fill_any_op.h +++ b/paddle/fluid/operators/fill_any_op.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -41,7 +41,7 @@ class FillAnyKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); auto &dev_ctx = ctx.template device_context(); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), out, static_cast(fill_var)); } @@ -55,7 +55,7 @@ class FillAnyGradKernel : public framework::OpKernel { if (dx) { dx->mutable_data(ctx.GetPlace()); auto &dev_ctx = ctx.template device_context(); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), dx, T(0)); } } diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.h b/paddle/fluid/operators/fill_constant_batch_size_like_op.h index 4c90daa39f940..ed3a6618977f5 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -60,7 +60,7 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace(); if (cpu_place) { auto &dev_ctx = *pool.Get(platform::CPUPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; out->mutable_data(platform::CPUPlace(), data_type); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); @@ -68,7 +68,7 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (!cpu_place) { auto &dev_ctx = *pool.Get(ctx.GetPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; out->mutable_data(ctx.GetPlace(), data_type); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc index 6b07b021d13a1..98e03ea66d852 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc @@ -70,7 +70,7 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel { bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace(); if (cpu_place) { auto &dev_ctx = *pool.Get(platform::CPUPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; out->mutable_data(platform::CPUPlace(), data_type); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h index c74cf2a824c83..15c9241275d10 100644 --- a/paddle/fluid/operators/fill_constant_op.h +++ b/paddle/fluid/operators/fill_constant_op.h @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -121,14 +121,14 @@ class FillConstantKernel : public framework::OpKernel { << ((data_type == framework::proto::VarType::BF16) ? "" : ""); tensor->mutable_data(platform::CPUPlace(), data_type); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(platform::CPUPlace()); functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); } else if (actual_place == 1) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) tensor->mutable_data(ctx.GetPlace(), data_type); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(ctx.GetPlace()); functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); @@ -139,7 +139,7 @@ class FillConstantKernel : public framework::OpKernel { } else if (actual_place == 2) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) tensor->mutable_data(platform::CUDAPinnedPlace(), data_type); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(platform::CUDAPinnedPlace()); functor( reinterpret_cast(dev_ctx), @@ -151,7 +151,7 @@ class FillConstantKernel : public framework::OpKernel { } else if (actual_place == 3) { #ifdef PADDLE_WITH_XPU tensor->mutable_data(ctx.GetPlace(), data_type); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(ctx.GetPlace()); functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); diff --git a/paddle/fluid/operators/fill_zeros_like_op.h b/paddle/fluid/operators/fill_zeros_like_op.h index 4bbe0df6b6890..c34358d9a3c41 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.h +++ b/paddle/fluid/operators/fill_zeros_like_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -26,7 +26,7 @@ class FillZerosLikeKernel : public framework::OpKernel { auto* out = context.Output("Out"); out->mutable_data(context.GetPlace()); - math::SetConstant setter; + pten::funcs::SetConstant setter; setter(context.template device_context(), out, static_cast(0)); } diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h index 2a9c2b27d2371..15e820a9ee366 100644 --- a/paddle/fluid/operators/flatten_op.h +++ b/paddle/fluid/operators/flatten_op.h @@ -17,12 +17,12 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/pten/kernels/empty_kernel.h" #include "paddle/pten/kernels/flatten_grad_kernel.h" #include "paddle/pten/kernels/flatten_kernel.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fold_op.h b/paddle/fluid/operators/fold_op.h index d37edbfe80375..7f2f26b464ff0 100644 --- a/paddle/fluid/operators/fold_op.h +++ b/paddle/fluid/operators/fold_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/im2col.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -64,7 +64,7 @@ class FoldOpKernel : public framework::OpKernel { framework::DDim input_matrix_shape({input_dims[0], kernel_sizes[0], kernel_sizes[1], output_height, output_width}); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, output, static_cast(0)); for (int i = 0; i < batch_size; i++) { diff --git a/paddle/fluid/operators/frame_op.h b/paddle/fluid/operators/frame_op.h index 482c6411812b6..0f34e2f7fccad 100644 --- a/paddle/fluid/operators/frame_op.h +++ b/paddle/fluid/operators/frame_op.h @@ -18,11 +18,11 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/seq2col.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fsp_op.h b/paddle/fluid/operators/fsp_op.h index 55bd23784d402..999c3ae3747e9 100644 --- a/paddle/fluid/operators/fsp_op.h +++ b/paddle/fluid/operators/fsp_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -79,7 +79,7 @@ class FSPGradOpKernel : public framework::OpKernel { int64_t w = 0; auto blas = math::GetBlas(context); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; if (d_x != nullptr) { d_x->mutable_data(context.GetPlace()); set_zero(context.template device_context(), d_x, diff --git a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc index 74307c3ba7917..cd88b67a56323 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc +++ b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc @@ -22,8 +22,8 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h" #include "paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(cudnn_batchnorm_spatial_persistent); diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc index 425782d7900b4..bec44662a2615 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc +++ b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/fused/cudnn_norm_conv.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/fused/fused_attention_op.cu b/paddle/fluid/operators/fused/fused_attention_op.cu index 581fc45e268c2..79569bb3a79c1 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_attention_op.cu @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/operators/elementwise/elementwise_add_op.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/fused/attention_layer_norm.h" #include "paddle/fluid/operators/fused/attn_gemm.h" diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_activation_op.cu index 83328caf3844f..e825ad30782ad 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu @@ -20,10 +20,10 @@ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/fused/fused_bn_activation_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/norm_utils.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(cudnn_batchnorm_spatial_persistent); @@ -256,7 +256,8 @@ class FusedBatchNormActGradKernel PADDLE_THROW( platform::errors::Unimplemented("Unsupported activation type")); } - math::SetConstant> + pten::funcs::SetConstant> functor; functor(dev_ctx, d_scale, static_cast>(0)); functor(dev_ctx, d_bias, static_cast>(0)); diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu index 7c124a0d6b661..c5bc5b1725516 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -19,10 +19,10 @@ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/norm_utils.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(cudnn_batchnorm_spatial_persistent); diff --git a/paddle/fluid/operators/fused/fused_dropout_test.h b/paddle/fluid/operators/fused/fused_dropout_test.h index a0d1cd43404eb..59b997bb5149f 100644 --- a/paddle/fluid/operators/fused/fused_dropout_test.h +++ b/paddle/fluid/operators/fused/fused_dropout_test.h @@ -24,8 +24,8 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/layer_norm_kernel.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/gather.cu.h b/paddle/fluid/operators/gather.cu.h index 8386896027fa0..739fcc9b18400 100644 --- a/paddle/fluid/operators/gather.cu.h +++ b/paddle/fluid/operators/gather.cu.h @@ -18,10 +18,10 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -283,7 +283,7 @@ void GatherV2GradCUDAFunction(const Tensor* input, const Tensor* index, auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); auto out_dim = out->dims(); int64_t out_index_dim_size = out_dim[axis_index]; - operators::math::set_constant(*dev_ctx, out, 0.0); + pten::funcs::set_constant(*dev_ctx, out, 0.0); platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), input_size); diff --git a/paddle/fluid/operators/gather.h b/paddle/fluid/operators/gather.h index 84ec587bede25..bd339c4a085b5 100644 --- a/paddle/fluid/operators/gather.h +++ b/paddle/fluid/operators/gather.h @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -226,7 +226,7 @@ void GatherV2GradFunction(const Tensor* input, const Tensor* index, auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); auto out_dim = out->dims(); int64_t out_index_dim_size = out_dim[axis_index]; - operators::math::set_constant(*dev_ctx, out, 0.0); + pten::funcs::set_constant(*dev_ctx, out, 0.0); for (int64_t i = 0; i < inner_dim_size; i++) { for (int64_t j = 0; j < input_index_dim_size; j++) { diff --git a/paddle/fluid/operators/gather_op_npu_test.cc b/paddle/fluid/operators/gather_op_npu_test.cc index f50c4f5528e74..247ce8529c93c 100644 --- a/paddle/fluid/operators/gather_op_npu_test.cc +++ b/paddle/fluid/operators/gather_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/gather_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(gather); USE_OP_DEVICE_KERNEL(gather, NPU); diff --git a/paddle/fluid/operators/gelu_op_npu_test.cc b/paddle/fluid/operators/gelu_op_npu_test.cc index f47250c96817a..bcaf7b11feb99 100644 --- a/paddle/fluid/operators/gelu_op_npu_test.cc +++ b/paddle/fluid/operators/gelu_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(gelu); USE_OP_DEVICE_KERNEL(gelu, NPU); diff --git a/paddle/fluid/operators/grid_sampler_op.cu b/paddle/fluid/operators/grid_sampler_op.cu index 8e9f445f3b116..df70efcc6ff5c 100644 --- a/paddle/fluid/operators/grid_sampler_op.cu +++ b/paddle/fluid/operators/grid_sampler_op.cu @@ -292,7 +292,7 @@ class GridSampleOpCUDAKernel : public framework::OpKernel { auto* output_data = output->mutable_data(ctx.GetPlace()); VLOG(3) << "out dims: " << output->dims()[0] << "; " << output->dims()[1] << "; " << output->dims()[2] << "; " << output->dims()[3]; - math::SetConstant()( + pten::funcs::SetConstant()( dev_ctx, output, static_cast(0)); int count = static_cast(n * out_h * out_w); auto cu_stream = dev_ctx.stream(); @@ -459,7 +459,7 @@ class GridSampleGradOpCUDAKernel : public framework::OpKernel { auto* input_grad = ctx.Output(framework::GradVarName("X")); input_grad->mutable_data(ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), input_grad, static_cast(0)); @@ -467,7 +467,7 @@ class GridSampleGradOpCUDAKernel : public framework::OpKernel { if (ctx.HasOutput(framework::GradVarName("Grid"))) { auto* grid_grad = ctx.Output(framework::GradVarName("Grid")); grid_grad_data = grid_grad->mutable_data(ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), grid_grad, static_cast(0)); } diff --git a/paddle/fluid/operators/grid_sampler_op.h b/paddle/fluid/operators/grid_sampler_op.h index a595e5078b21d..874a8d8c2a2b6 100644 --- a/paddle/fluid/operators/grid_sampler_op.h +++ b/paddle/fluid/operators/grid_sampler_op.h @@ -19,8 +19,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -520,7 +520,7 @@ class GridSampleOpKernel : public framework::OpKernel { auto* output = ctx.Output("Output"); output->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), output, static_cast(0)); @@ -563,7 +563,7 @@ class GridSampleGradOpKernel : public framework::OpKernel { auto* input_grad = ctx.Output(framework::GradVarName("X")); input_grad->mutable_data({n, c, in_h, in_w}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), input_grad, static_cast(0)); @@ -571,7 +571,7 @@ class GridSampleGradOpKernel : public framework::OpKernel { if (ctx.HasOutput(framework::GradVarName("Grid"))) { grid_grad = ctx.Output(framework::GradVarName("Grid")); grid_grad->mutable_data({n, out_h, out_w, 2}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), grid_grad, static_cast(0)); } diff --git a/paddle/fluid/operators/group_norm_op.cu b/paddle/fluid/operators/group_norm_op.cu index 055fd791af5a3..584be96c659d6 100644 --- a/paddle/fluid/operators/group_norm_op.cu +++ b/paddle/fluid/operators/group_norm_op.cu @@ -153,7 +153,7 @@ class GroupNormKernel y->mutable_data(ctx.GetPlace()); mean->mutable_data(ctx.GetPlace()); var->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); Tensor temp_var; temp_var.mutable_data(var->dims(), ctx.GetPlace()); @@ -321,7 +321,7 @@ class GroupNormGradKernel : x_dims[x_dims.size() - 2]); d_x->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); Tensor temp_var; diff --git a/paddle/fluid/operators/group_norm_op.h b/paddle/fluid/operators/group_norm_op.h index 9cb451235f152..3fc2d413b6cef 100644 --- a/paddle/fluid/operators/group_norm_op.h +++ b/paddle/fluid/operators/group_norm_op.h @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -239,7 +239,7 @@ class GroupNormGradKernel : public framework::OpKernel { const int group_size = C / groups; d_x->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); auto* x_data = x->data(); diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index a2d61695649dc..20956e3cdbbde 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -321,7 +321,7 @@ class GRUCPUKernel : public framework::OpKernel { to_batch(dev_ctx, *input, batch_gate, true, is_reverse); if (bias) { - math::RowwiseAdd add_bias; + pten::funcs::RowwiseAdd add_bias; add_bias(dev_ctx, *batch_gate, *bias, batch_gate); } diff --git a/paddle/fluid/operators/gru_op.cu.cc b/paddle/fluid/operators/gru_op.cu.cc index ce3c8ac51c76a..0f1db8de5a30b 100644 --- a/paddle/fluid/operators/gru_op.cu.cc +++ b/paddle/fluid/operators/gru_op.cu.cc @@ -70,7 +70,7 @@ class GRUKernel : public framework::OpKernel { to_batch(dev_ctx, *input, batch_gate, true, is_reverse); if (bias) { - math::RowwiseAdd add_bias; + pten::funcs::RowwiseAdd add_bias; add_bias(dev_ctx, *batch_gate, *bias, batch_gate); } diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index bcca992e2b426..e9d520dd9fc66 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -18,8 +18,8 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/gru_compute.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence2batch.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -69,7 +69,7 @@ class GRUGradKernel : public framework::OpKernel { batch_gate_grad.mutable_data(gate_dims, context.GetPlace()); batch_reset_hidden_prev_grad.mutable_data(hidden_dims, context.GetPlace()); - math::SetConstant zero; + pten::funcs::SetConstant zero; auto& dev_ctx = context.template device_context(); zero(dev_ctx, &batch_hidden_grad, static_cast(0.0)); zero(dev_ctx, &batch_gate_grad, static_cast(0.0)); @@ -157,7 +157,7 @@ class GRUGradKernel : public framework::OpKernel { } if (bias_grad) { bias_grad->mutable_data(context.GetPlace()); - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; col_sum(dev_ctx, batch_gate_grad, bias_grad); } if (h0 && h0_grad) { diff --git a/paddle/fluid/operators/gumbel_softmax_op.cu b/paddle/fluid/operators/gumbel_softmax_op.cu index 63577ed1e0f1f..ba6ce141e81c0 100644 --- a/paddle/fluid/operators/gumbel_softmax_op.cu +++ b/paddle/fluid/operators/gumbel_softmax_op.cu @@ -99,7 +99,7 @@ struct OneHotGenerator { Tensor input_tensor; input_tensor.mutable_data(Out->dims(), platform::CUDAPlace()); paddle::framework::TensorCopy(*Out, context.GetPlace(), &input_tensor); - math::set_constant(context, Out, 0.0); + pten::funcs::set_constant(context, Out, 0.0); OneHotCUDAKernel< T, thread_size><<>>( height, size_from_axis / size_out_axis, size_out_axis, diff --git a/paddle/fluid/operators/gumbel_softmax_op.h b/paddle/fluid/operators/gumbel_softmax_op.h index f95a4810f4442..3cd211ccc3e47 100644 --- a/paddle/fluid/operators/gumbel_softmax_op.h +++ b/paddle/fluid/operators/gumbel_softmax_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/softmax.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -155,7 +155,7 @@ struct OneHotGenerator { #undef CALL_ARG_MINMAX_FUNCTOR } - math::set_constant(context, Out, 0.0); + pten::funcs::set_constant(context, Out, 0.0); for (int i = 0; i < size_to_axis; i++) { for (int j = 0; j < size_out_axis; j++) { *(Out->data() + i * size_from_axis + j + diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.h b/paddle/fluid/operators/hierarchical_sigmoid_op.h index 17734b9c542c8..5734e247f4dfc 100644 --- a/paddle/fluid/operators/hierarchical_sigmoid_op.h +++ b/paddle/fluid/operators/hierarchical_sigmoid_op.h @@ -24,9 +24,9 @@ limitations under the License. */ #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/clip_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matrix_bit_code.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -81,10 +81,10 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel { auto pre_out_mat = EigenMatrix::From(*pre_out); // Not all class(leaf) nodes' path lengths equal code_length, thus init as // 0s can avoid out of path's loss. - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, pre_out, static_cast(0.0)); auto& place = *ctx.template device_context().eigen_device(); - math::RowwiseSum row_sum; + pten::funcs::RowwiseSum row_sum; std::unique_ptr> bit_code; if (!is_custom) { @@ -134,7 +134,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { auto* in_grad = ctx.Output(framework::GradVarName("X")); bool is_sparse = ctx.Attr("is_sparse"); auto& dev_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; auto& label = GET_DATA_SAFELY(ctx.Input("Label"), "Input", "Label", "HierarchicalSigmoidGrad"); auto& pre_out = GET_DATA_SAFELY(ctx.Input("PreOut"), "Input", diff --git a/paddle/fluid/operators/histogram_op.cu b/paddle/fluid/operators/histogram_op.cu index a34f4b8a22e57..48a637e6c37b1 100644 --- a/paddle/fluid/operators/histogram_op.cu +++ b/paddle/fluid/operators/histogram_op.cu @@ -82,7 +82,7 @@ class HistogramCUDAKernel : public framework::OpKernel { const int input_numel = input->numel(); int64_t* out_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); diff --git a/paddle/fluid/operators/histogram_op.h b/paddle/fluid/operators/histogram_op.h index a6f4448cbcb17..9e280336e492a 100644 --- a/paddle/fluid/operators/histogram_op.h +++ b/paddle/fluid/operators/histogram_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -39,7 +39,7 @@ class HistogramKernel : public framework::OpKernel { auto input_numel = input->numel(); int64_t* out_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h index 39ff7ea40aaa8..6eac1cc4e4c8e 100644 --- a/paddle/fluid/operators/im2sequence_op.h +++ b/paddle/fluid/operators/im2sequence_op.h @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/math/im2col.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/increment_op_npu_test.cc b/paddle/fluid/operators/increment_op_npu_test.cc index ca9420c04a293..47e2f2c3cfc03 100644 --- a/paddle/fluid/operators/increment_op_npu_test.cc +++ b/paddle/fluid/operators/increment_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(increment); USE_OP_DEVICE_KERNEL(increment, NPU); diff --git a/paddle/fluid/operators/index_sample_op.cu b/paddle/fluid/operators/index_sample_op.cu index 4c9dec1400076..e145c555dc552 100644 --- a/paddle/fluid/operators/index_sample_op.cu +++ b/paddle/fluid/operators/index_sample_op.cu @@ -14,9 +14,9 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/index_sample_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define PREDEFINED_BLOCK_SIZE_X 512 #define PREDEFINED_BLOCK_SIZE 1024 @@ -177,7 +177,7 @@ class IndexSampleGradKernel (batch_size + block_dim.y - 1) / block_dim.y); LimitGridDim(ctx, &grid_dim); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, input_grad, static_cast(0)); diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index be76a66ef7c96..b157f775d50eb 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -16,7 +16,7 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -159,7 +159,7 @@ void IndexSelectGradInner(const framework::ExecutionContext& context, auto output_dim = x_grad->dims(); auto& dev_ctx = context.template device_context(); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(dev_ctx, x_grad, static_cast(0.0)); auto slice_size = 1; diff --git a/paddle/fluid/operators/inplace_abn_op.h b/paddle/fluid/operators/inplace_abn_op.h index 9c3727ab903d9..142096eb34cc1 100644 --- a/paddle/fluid/operators/inplace_abn_op.h +++ b/paddle/fluid/operators/inplace_abn_op.h @@ -16,7 +16,7 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index cfdaacf8cb6ee..8c650c6437632 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -208,7 +208,7 @@ class InstanceNormKernel Eigen::IndexList> rdims; #endif - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; saved_mean->mutable_data(ctx.GetPlace()); saved_variance->mutable_data(ctx.GetPlace()); @@ -356,7 +356,7 @@ class InstanceNormGradKernel NxC_shape.set(0, NxC); #endif - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; Tensor scale_data; if (!scale) { @@ -492,7 +492,7 @@ class InstanceNormDoubleGradKernel auto *ddY = ctx.Output("DDY"); auto &dev_ctx = ctx.template device_context(); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; const auto &x_dims = X->dims(); int N, C, H, W, D; diff --git a/paddle/fluid/operators/instance_norm_op.cu b/paddle/fluid/operators/instance_norm_op.cu index e0401366693b1..a6c935074feb0 100644 --- a/paddle/fluid/operators/instance_norm_op.cu +++ b/paddle/fluid/operators/instance_norm_op.cu @@ -25,8 +25,8 @@ namespace cub = hipcub; #endif #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/instance_norm_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -169,7 +169,7 @@ class InstanceNormKernel const int max_blocks = std::max(max_threads / block, 1); const int grid = std::min((NxC + block - 1) / block, max_blocks); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; if (scale) { repeat_param<<>>( scale->data(), scale_tmp.data(), N, C); @@ -185,7 +185,7 @@ class InstanceNormKernel auto handle = dev_ctx.cudnn_handle(); - math::SetConstant> + pten::funcs::SetConstant> functor; auto *saved_mean = ctx.Output("SavedMean"); @@ -349,7 +349,7 @@ class InstanceNormGradKernel } auto &dev_ctx = ctx.template device_context(); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; const int n = x->numel(); const int block = 512; @@ -379,7 +379,8 @@ class InstanceNormGradKernel if ((H * W * D) == 1) { framework::TensorCopy(*d_y, ctx.GetPlace(), d_x); - math::SetConstant> + pten::funcs::SetConstant> functor; functor(dev_ctx, d_scale, static_cast>(0)); functor(dev_ctx, d_bias, static_cast>(0)); @@ -732,7 +733,7 @@ class InstanceNormDoubleGradKernel const T *variance_data = Saved_variance->data(); auto &dev_ctx = ctx.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto &x_dims = X->dims(); int N, C, H, W, D; diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index 3c857eb326ace..eaf8a2f7d938d 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -1159,7 +1159,7 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_w == out_w) { @@ -1241,7 +1241,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_h == out_h && in_w == out_w) { @@ -1348,7 +1348,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, } auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_d == out_d && in_h == out_h && in_w == out_w) { diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index 0c0dde6bd4536..46353cfb2f2e6 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -14,8 +14,8 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -1057,7 +1057,7 @@ static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_w == out_w) { @@ -1126,7 +1126,7 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_h == out_h && in_w == out_w) { @@ -1213,7 +1213,7 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, } input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_d == out_d && in_h == out_h && in_w == out_w) { diff --git a/paddle/fluid/operators/interpolate_v2_op.cu b/paddle/fluid/operators/interpolate_v2_op.cu index 3901a2515450d..7850be047b774 100644 --- a/paddle/fluid/operators/interpolate_v2_op.cu +++ b/paddle/fluid/operators/interpolate_v2_op.cu @@ -1658,7 +1658,7 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_w == out_w) { @@ -1780,7 +1780,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_h == out_h && in_w == out_w) { @@ -1965,7 +1965,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, } auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_d == out_d && in_h == out_h && in_w == out_w) { diff --git a/paddle/fluid/operators/interpolate_v2_op.h b/paddle/fluid/operators/interpolate_v2_op.h index 4d6189b57bf1c..400c94f48a541 100644 --- a/paddle/fluid/operators/interpolate_v2_op.h +++ b/paddle/fluid/operators/interpolate_v2_op.h @@ -14,8 +14,8 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -1276,7 +1276,7 @@ static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_w == out_w) { @@ -1383,7 +1383,7 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_h == out_h && in_w == out_w) { @@ -1527,7 +1527,7 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, } input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_d == out_d && in_h == out_h && in_w == out_w) { diff --git a/paddle/fluid/operators/layer_norm_op.h b/paddle/fluid/operators/layer_norm_op.h index ad7c0cc218b20..b7916f44d3c33 100644 --- a/paddle/fluid/operators/layer_norm_op.h +++ b/paddle/fluid/operators/layer_norm_op.h @@ -25,7 +25,7 @@ limitations under the License. */ !defined(__OSX__) #include "paddle/fluid/operators/jit/kernels.h" #endif -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace platform { @@ -57,7 +57,7 @@ class RowwiseMean2D { : left_(left), right_(right) { framework::DDim ones_dim({right_}); divisor_.mutable_data(ones_dim, dev_ctx.GetPlace()); - math::set_constant(dev_ctx, &divisor_, 1.0 / right); + pten::funcs::set_constant(dev_ctx, &divisor_, 1.0 / right); } void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, framework::Tensor* out) { @@ -84,7 +84,7 @@ class RowwiseMean2D { } private: - math::RowwiseMean row_mean_; + pten::funcs::RowwiseMean row_mean_; }; template @@ -103,7 +103,7 @@ class ColwiseSum2D { : left_(left), right_(right) { framework::DDim ones_dim({left_}); divisor_.mutable_data(ones_dim, dev_ctx.GetPlace()); - math::set_constant(dev_ctx, &divisor_, 1.0); + pten::funcs::set_constant(dev_ctx, &divisor_, 1.0); } void operator()(const platform::CUDADeviceContext& context, @@ -131,7 +131,7 @@ class ColwiseSum2D { } private: - math::ColwiseSum col_wise_; + pten::funcs::ColwiseSum col_wise_; }; template diff --git a/paddle/fluid/operators/layout_utils.h b/paddle/fluid/operators/layout_utils.h index 52fa7fd1079a7..57c95afc102c6 100644 --- a/paddle/fluid/operators/layout_utils.h +++ b/paddle/fluid/operators/layout_utils.h @@ -20,7 +20,7 @@ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -112,18 +112,18 @@ inline void TransToChannelFirst(const framework::ExecutionContext& context, if (dim == 3) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 4, 1, 2, 3}; - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, *input, transformed_input, axis); } else if (dim == 2) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 3, 1, 2}; - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, *input, transformed_input, axis); } else if (dim == 1) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 2, 1}; - math::Transpose trans3; + pten::funcs::Transpose trans3; trans3(dev_ctx, *input, transformed_input, axis); } } @@ -135,18 +135,18 @@ inline void TransToChannelLast(const framework::ExecutionContext& context, if (dim == 3) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 2, 3, 4, 1}; - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, *input, transformed_input, axis); } else if (dim == 2) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 2, 3, 1}; - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, *input, transformed_input, axis); } else if (dim == 1) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 2, 1}; - math::Transpose trans3; + pten::funcs::Transpose trans3; trans3(dev_ctx, *input, transformed_input, axis); } } diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h index eacc5f467d229..c9a82dec724f4 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ b/paddle/fluid/operators/linear_chain_crf_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -102,8 +102,8 @@ class LinearChainCRFOpKernel : public framework::OpKernel { label_tmp.Resize({batch_size, 1}); alpha_tmp.Resize({batch_size, tag_num}); emission_exps_tmp.Resize({batch_size, tag_num}); - math::set_constant(ctx.device_context(), emission_exps, 0.0); - math::set_constant(ctx.device_context(), alpha, 0.0); + pten::funcs::set_constant(ctx.device_context(), emission_exps, 0.0); + pten::funcs::set_constant(ctx.device_context(), alpha, 0.0); } else { in_lod = ctx.Input("Label")->lod(); PADDLE_ENFORCE_NE(in_lod.size(), 0, @@ -274,7 +274,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { // data reader operator, it can have no gradients. if (transition_grad) { transition_grad->mutable_data(platform::CPUPlace()); - math::set_constant(ctx.device_context(), transition_grad, 0.); + pten::funcs::set_constant(ctx.device_context(), transition_grad, 0.); } // Now, all the inputs and outputs should be on the CPU memory. auto emission_dims = emission_exps->dims(); diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h index d8e0fefe17586..7e384f4b64bc3 100644 --- a/paddle/fluid/operators/linspace_op.h +++ b/paddle/fluid/operators/linspace_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index b7c28a0908dd4..bee8b5396af5f 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif @@ -36,7 +36,7 @@ struct LRNFunctor { T k, T alpha, T beta, const DataLayout data_layout) { auto place = ctx.GetPlace(); auto blas = math::GetBlas(ctx); - math::Transpose transpose; + pten::funcs::Transpose transpose; auto& dev_ctx = ctx.template device_context(); Tensor in_transpose, mid_transpose, out_transpose; // if channel_last, transpose to channel_first diff --git a/paddle/fluid/operators/lrn_op.h b/paddle/fluid/operators/lrn_op.h index bdf3ad81ddbba..a619d6c72376c 100644 --- a/paddle/fluid/operators/lrn_op.h +++ b/paddle/fluid/operators/lrn_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index c6f43b949a736..df94952a9a693 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -76,7 +76,7 @@ class LSTMKernel : public framework::OpKernel { Tensor b = *bias; b.Resize({bias->numel(), 1}); Tensor gate_bias = b.Slice(0, 4 * frame_size); - math::RowwiseAdd add_bias; + pten::funcs::RowwiseAdd add_bias; add_bias(device_ctx, *batch_gate, gate_bias, batch_gate); } @@ -210,7 +210,7 @@ class LSTMGradKernel : public framework::OpKernel { auto* c0_g = ctx.Output(framework::GradVarName("C0")); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; if (weight_g) { weight_g->mutable_data(ctx.GetPlace()); zero(device_ctx, weight_g, static_cast(0.0)); @@ -380,7 +380,7 @@ class LSTMGradKernel : public framework::OpKernel { Tensor b_g = *bias_g; b_g.Resize({bias_g->numel(), 1}); Tensor gate_bias_g = b_g.Slice(0, 4 * frame_size); - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; col_sum(device_ctx, batch_gate_g, &gate_bias_g); } diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index 5a6ac42f45785..c63184f76e702 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -133,7 +133,7 @@ class LSTMPKernel : public framework::OpKernel { Tensor b = *bias; b.Resize({bias->numel(), 1}); Tensor gate_bias = b.Slice(0, 4 * frame_size); - math::RowwiseAdd add_bias; + pten::funcs::RowwiseAdd add_bias; add_bias(device_ctx, *batch_gate, gate_bias, batch_gate); } @@ -304,7 +304,7 @@ class LSTMPGradKernel : public framework::OpKernel { auto* c0_g = ctx.Output(framework::GradVarName("C0")); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; if (weight_g) { weight_g->mutable_data(ctx.GetPlace()); zero(device_ctx, weight_g, static_cast(0.0)); @@ -514,7 +514,7 @@ class LSTMPGradKernel : public framework::OpKernel { Tensor b_g = *bias_g; b_g.Resize({bias_g->numel(), 1}); Tensor gate_bias_g = b_g.Slice(0, 4 * frame_size); - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; col_sum(device_ctx, batch_gate_g, &gate_bias_g); } diff --git a/paddle/fluid/operators/lstsq_op.h b/paddle/fluid/operators/lstsq_op.h index be411232706a5..dd0cff5cc5f44 100644 --- a/paddle/fluid/operators/lstsq_op.h +++ b/paddle/fluid/operators/lstsq_op.h @@ -21,12 +21,12 @@ #include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/eigen_values_vectors.h" #include "paddle/fluid/operators/math/lapack_function.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define EPSILON 1e-6 diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h index c3b3552ba1329..b3d79122bcd83 100644 --- a/paddle/fluid/operators/lu_op.h +++ b/paddle/fluid/operators/lu_op.h @@ -455,7 +455,7 @@ void Unpack_Pivot(const DeviceContext& dev_ctx, const framework::Tensor& Pivot, auto Pdim = framework::make_ddim(Pdimvec); P->Resize(Pdim); auto pdata = P->mutable_data(dev_ctx.GetPlace()); - math::SetConstant setter; + pten::funcs::SetConstant setter; setter(dev_ctx, P, static_cast(0)); auto batchsize = product(framework::slice_ddim(dims, 0, prank - 1)); @@ -543,7 +543,7 @@ class LUGradKernel : public framework::OpKernel { Tensor_Add(dev_ctx, phi_L, phi_U, &phi); psi.Resize(xdims); psi.mutable_data(ctx.GetPlace()); - math::SetConstant setter; + pten::funcs::SetConstant setter; setter(dev_ctx, &psi, static_cast(0)); std::vector axes = {xrank - 2, xrank - 1}; diff --git a/paddle/fluid/operators/lu_unpack_op.h b/paddle/fluid/operators/lu_unpack_op.h index 115ab116fda1a..c245c7eb65551 100644 --- a/paddle/fluid/operators/lu_unpack_op.h +++ b/paddle/fluid/operators/lu_unpack_op.h @@ -110,7 +110,7 @@ class LU_UnpackGradKernel : public framework::OpKernel { std::vector slice_ends(2, 0); auto valuedims = vectorize(xdims); - math::SetConstant setter; + pten::funcs::SetConstant setter; setter(dev_ctx, dx, static_cast(0)); if (m <= n) { slice_starts[0] = 0; diff --git a/paddle/fluid/operators/margin_cross_entropy_op.cu b/paddle/fluid/operators/margin_cross_entropy_op.cu index 51776f2166dd5..a59909644aa25 100644 --- a/paddle/fluid/operators/margin_cross_entropy_op.cu +++ b/paddle/fluid/operators/margin_cross_entropy_op.cu @@ -22,11 +22,11 @@ namespace cub = hipcub; #include #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/margin_cross_entropy_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/softmax_impl.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.h" #include "paddle/fluid/string/string_helper.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/platform/collective_helper.h" @@ -341,8 +341,8 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { // step 6, prob = exp((logit - logit_max) - log(sum(exp(logit - // logit_max)))) // loss = -((logit_i - logit_max) - log(sum(exp(logit - logit_max)))) - math::SetConstant()(dev_ctx, loss, - static_cast(0.0)); + pten::funcs::SetConstant()( + dev_ctx, loss, static_cast(0.0)); if (label_type == framework::proto::VarType::INT32) { typedef int32_t LabelT; HardLabelSoftmaxWithCrossEntropyKernel< diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 65bf595bcebb8..a97e2ecfce701 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -61,7 +61,7 @@ math_library(gru_compute DEPS activation_functions math_function) math_library(lstm_compute DEPS activation_functions) cc_library(blas SRCS blas.cc DEPS cblas framework_proto device_context) -math_library(math_function DEPS blas dense_tensor tensor) +# math_library(math_function DEPS blas dense_tensor tensor) math_library(maxouting) math_library(pooling) @@ -95,7 +95,6 @@ math_library(matrix_inverse) math_library(segment_pooling) math_library(matrix_solve) -cc_test(math_function_test SRCS math_function_test.cc DEPS math_function) cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) cc_test(im2col_test SRCS im2col_test.cc DEPS im2col) cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) @@ -103,11 +102,9 @@ cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_paddin cc_test(sequence_pooling_test SRCS sequence_pooling_test.cc DEPS sequence_pooling) cc_test(beam_search_test SRCS beam_search_test.cc DEPS beam_search) if(WITH_GPU) - nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function) endif() if(WITH_ROCM) - hip_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor) hip_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function) endif() cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split) diff --git a/paddle/fluid/operators/math/blas_impl.cu.h b/paddle/fluid/operators/math/blas_impl.cu.h index 7ffd2a7ab2d84..f9a4e963c0c47 100644 --- a/paddle/fluid/operators/math/blas_impl.cu.h +++ b/paddle/fluid/operators/math/blas_impl.cu.h @@ -14,8 +14,8 @@ #pragma once -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/dynload/cublas.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/pten/backends/gpu/gpu_context.h" diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index 80b7acc610330..8e0075c42eb2c 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -22,9 +22,9 @@ #include #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/complex.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/blas_impl.hip.h b/paddle/fluid/operators/math/blas_impl.hip.h index bf7d66f485327..980caa9cfe68c 100644 --- a/paddle/fluid/operators/math/blas_impl.hip.h +++ b/paddle/fluid/operators/math/blas_impl.hip.h @@ -14,10 +14,10 @@ #pragma once -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/dynload/rocblas.h" #include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(enable_cublas_tensor_op_math); diff --git a/paddle/fluid/operators/math/depthwise_conv.cu b/paddle/fluid/operators/math/depthwise_conv.cu index 6ff2ddaa338df..117e6c4708064 100644 --- a/paddle/fluid/operators/math/depthwise_conv.cu +++ b/paddle/fluid/operators/math/depthwise_conv.cu @@ -22,9 +22,9 @@ limitations under the License. */ namespace cub = hipcub; #endif #include "paddle/fluid/operators/math/depthwise_conv.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -910,7 +910,7 @@ class DepthwiseConvFunctor(context.GetPlace()); std::vector perm_axis({2, 3, 0, 1}); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; trans(context, filter, &filter_hwc, perm_axis); filter_data = filter_hwc.data(); } @@ -1053,7 +1053,7 @@ class DepthwiseConvInputGradFunctor(context.GetPlace()); std::vector perm_axis({2, 3, 0, 1}); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; trans(context, filter, &filter_hwc, perm_axis); filter_data = filter_hwc.data(); } @@ -1215,7 +1215,7 @@ class DepthwiseConvFilterGradFunctordims()[0], filter_grad->dims()[1]}); \ filter_grad_hwc.Resize(filter_grad_hwc_dims); \ filter_grad_hwc.mutable_data(context.GetPlace()); \ - math::SetConstant set_zero; \ + pten::funcs::SetConstant set_zero; \ set_zero(context, &filter_grad_hwc, static_cast(0)); \ filter_grad_data = filter_grad_hwc.data(); \ } else { \ @@ -1240,7 +1240,7 @@ class DepthwiseConvFilterGradFunctor perm_axis({2, 3, 0, 1}); \ - math::TransposeNormal trans; \ + pten::funcs::TransposeNormal trans; \ trans(context, filter_grad_hwc, filter_grad, perm_axis); \ } \ } \ diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc deleted file mode 100644 index 2672d02db008e..0000000000000 --- a/paddle/fluid/operators/math/math_function.cc +++ /dev/null @@ -1,306 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/math/math_function.h" - -#ifdef PADDLE_WITH_MKLML -#include "paddle/fluid/platform/dynload/mklml.h" -#endif - -#ifdef PADDLE_USE_OPENBLAS -#include -#endif - -#include -#include -#include -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/operators/math/math_function_impl.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/pten/backends/cpu/cpu_context.h" -#include "paddle/pten/kernels/funcs/eigen/common.h" -#include "unsupported/Eigen/CXX11/Tensor" - -namespace paddle { -namespace operators { -namespace math { - -using float16 = paddle::platform::float16; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -#ifdef PADDLE_WITH_XPU -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; -#endif - -#define DEFINE_CPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose, RANK>; \ - template struct Transpose, RANK>; - -DEFINE_CPU_TRANS(1); -DEFINE_CPU_TRANS(2); -DEFINE_CPU_TRANS(3); -DEFINE_CPU_TRANS(4); -DEFINE_CPU_TRANS(5); -DEFINE_CPU_TRANS(6); - -template -struct TransposeNormal { - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& in, framework::Tensor* out, - const std::vector& axis) { - const int rank = axis.size(); - auto in_stride = framework::stride(in.dims()); - auto out_stride = framework::stride(out->dims()); - const T* in_ptr = in.data(); - T* out_ptr = out->data(); - - auto transpose_helper = [&](int64_t beg, int64_t end) { - for (int64_t out_idx = beg; out_idx < end; ++out_idx) { - int64_t in_idx = 0; - int64_t tmp_idx = out_idx; - // calculate the input index - for (int i = 0; i < rank; ++i) { - const int64_t coordinate = tmp_idx / out_stride[i]; - tmp_idx -= coordinate * out_stride[i]; - in_idx += coordinate * in_stride[axis[i]]; - } - out_ptr[out_idx] = in_ptr[in_idx]; - } - }; - transpose_helper(0, out->numel()); - } -}; - -// define transpose normal -#define DEFINE_CPU_TRANS_NORMAL(TYPE) \ - template struct TransposeNormal - -DEFINE_CPU_TRANS_NORMAL(platform::float16); -DEFINE_CPU_TRANS_NORMAL(platform::bfloat16); -DEFINE_CPU_TRANS_NORMAL(float); -DEFINE_CPU_TRANS_NORMAL(double); -DEFINE_CPU_TRANS_NORMAL(int); -DEFINE_CPU_TRANS_NORMAL(int64_t); -DEFINE_CPU_TRANS_NORMAL(bool); -DEFINE_CPU_TRANS_NORMAL(int16_t); -DEFINE_CPU_TRANS_NORMAL(uint8_t); -DEFINE_CPU_TRANS_NORMAL(int8_t); -DEFINE_CPU_TRANS_NORMAL(platform::complex); -DEFINE_CPU_TRANS_NORMAL(platform::complex); - -struct TensorSetConstantCPU { - TensorSetConstantCPU(framework::Tensor* tensor, float value) - : tensor_(tensor), value_(value) {} - template - void apply() const { - auto cpu = platform::CPUPlace(); - auto* begin = tensor_->mutable_data(cpu); - std::fill(begin, begin + tensor_->numel(), static_cast(value_)); - } - framework::Tensor* tensor_; - float value_; -}; - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("NPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW( - platform::errors::Unimplemented("NPUPinnedPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("IPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - framework::VisitDataType(tensor->type(), TensorSetConstantCPU(tensor, value)); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("MLUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - framework::VisitDataType(tensor->type(), TensorSetConstantCPU(tensor, value)); -} - -struct TensorSetConstantWithPlace : public boost::static_visitor { - TensorSetConstantWithPlace(const platform::DeviceContext& context, - framework::Tensor* tensor, float value) - : context_(context), tensor_(tensor), value_(value) {} - - template - void operator()(Place place) const { - set_constant_with_place(context_, tensor_, value_); - } - - const platform::DeviceContext& context_; - framework::Tensor* tensor_; - float value_; -}; - -void set_constant(const platform::DeviceContext& context, - framework::Tensor* tensor, float value) { - TensorSetConstantWithPlace func(context, tensor, value); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - // tensor->place().apply_visitor(func); - paddle::platform::VisitPlace(tensor->place(), func); -#else - func(platform::CPUPlace()); -#endif -} - -template -struct RowwiseAdd { - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& vector, framework::Tensor* output) { - auto in_dims = input.dims(); - auto out_dims = output->dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ( - vector.numel(), size, - platform::errors::InvalidArgument( - "The input vector size" - " should be equal to the size of each row of input tensor." - " Expected vector size=%d, but received %d", - size, vector.numel())); - const char* in_dims_cstr = in_dims.to_str().c_str(); - const char* out_dims_cstr = out_dims.to_str().c_str(); - PADDLE_ENFORCE_EQ(out_dims, in_dims, - platform::errors::InvalidArgument( - "The output tensor shape should be same as the input" - " tensor shape. Expected output tensor shape: %s," - " but received %s", - in_dims_cstr, out_dims_cstr)); - - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(vector); - auto out = framework::EigenMatrix::From(*output); - - for (int64_t i = 0; i < in_dims[0]; ++i) { - out.chip(i, 0) = in.chip(i, 0) + vec; - } - } -}; - -template struct RowwiseAdd; -template struct RowwiseAdd; - -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; - -template struct RowwiseSum; -template struct RowwiseSum; - -template struct RowwiseMean; -template struct RowwiseMean; - -template -struct ElementwiseAddTo { - void operator()(platform::CPUDeviceContext* ctx, const framework::Tensor& src, - framework::Tensor* dst) { - auto in = framework::EigenVector::Flatten(src); - auto out = framework::EigenVector::Flatten(*dst); - auto& place = *(ctx->eigen_device()); - out.device(place) = out + in; - } -}; - -template struct ElementwiseAddTo; - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/math/math_function.cu b/paddle/fluid/operators/math/math_function.cu deleted file mode 100644 index f0ef692b99f57..0000000000000 --- a/paddle/fluid/operators/math/math_function.cu +++ /dev/null @@ -1,322 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include -#include -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" -#include "paddle/fluid/operators/math/math_function_impl.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/pten/backends/gpu/gpu_context.h" -#include "paddle/pten/kernels/funcs/eigen/common.h" - -namespace paddle { -namespace operators { -namespace math { - -using float16 = paddle::platform::float16; -using bfloat16 = paddle::platform::bfloat16; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -#define DEFINE_GPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose, RANK>; \ - template struct Transpose, RANK>; - -DEFINE_GPU_TRANS(1); -DEFINE_GPU_TRANS(2); -DEFINE_GPU_TRANS(3); -DEFINE_GPU_TRANS(4); -DEFINE_GPU_TRANS(5); -DEFINE_GPU_TRANS(6); - -#define REINTERPRET(T, DST_PTR, SRC_PTR) \ - T* DST_PTR = reinterpret_cast(SRC_PTR) - -template -__global__ void TransposeNormalKernel(const T* in_ptr, T* out_ptr, - int64_t element, - const int64_t* in_stride_ptr, - const int64_t* out_stride_ptr, - const int64_t* axis_ptr, int rank) { - CUDA_KERNEL_LOOP(out_idx, element) { - int64_t in_idx = 0; - int64_t tmp_idx = out_idx; - for (int i = 0; i < rank; ++i) { - const int64_t coordinate = tmp_idx / out_stride_ptr[i]; - tmp_idx -= coordinate * out_stride_ptr[i]; - in_idx += coordinate * in_stride_ptr[axis_ptr[i]]; - } - out_ptr[out_idx] = in_ptr[in_idx]; - } -} - -template -struct TransposeNormal { - void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& in, framework::Tensor* out, - const std::vector& axis) { - const int rank = axis.size(); - auto in_stride = framework::stride(in.dims()); - auto out_stride = framework::stride(out->dims()); - auto* in_ptr = in.data(); - auto* out_ptr = out->data(); - - // copy in_stride, out_stride, axis to gpu device - const platform::CUDAPlace& cuda_place = context.GetPlace(); - platform::CPUPlace cpu_place = platform::CPUPlace(); - size_t size = 3 * rank * sizeof(int64_t); - auto cpu_buf_holder = memory::Alloc(cpu_place, size); - auto cuda_buf_holder = memory::Alloc(cuda_place, size); - REINTERPRET(int64_t, cpu_buf, cpu_buf_holder->ptr()); - REINTERPRET(int64_t, cuda_buf, cuda_buf_holder->ptr()); - for (int i = 0; i < rank; ++i) { - cpu_buf[i] = in_stride[i]; - cpu_buf[rank + i] = out_stride[i]; - cpu_buf[2 * rank + i] = axis[i]; - } - memory::Copy(cuda_place, cuda_buf, cpu_place, cpu_buf, size, - context.stream()); - REINTERPRET(const int64_t, in_stride_ptr, cuda_buf); - REINTERPRET(const int64_t, out_stride_ptr, cuda_buf + rank); - REINTERPRET(const int64_t, axis_ptr, cuda_buf + 2 * rank); - - const int MAX_BLOCK_DIM = context.GetMaxThreadsPerBlock(); - const int MAX_GRID_DIM = - context.GetMaxPhysicalThreadCount() / MAX_BLOCK_DIM; - int64_t elements = in.numel(); - int block_size = (elements >= MAX_BLOCK_DIM) - ? MAX_BLOCK_DIM - : (1 << static_cast(std::log2(elements))); - int grid_size = elements / block_size; - grid_size = (grid_size >= MAX_GRID_DIM) ? MAX_GRID_DIM : grid_size; - TransposeNormalKernel<<>>( - in_ptr, out_ptr, elements, in_stride_ptr, out_stride_ptr, axis_ptr, - rank); - } -}; - -// define transpose normal -#define DEFINE_GPU_TRANS_NORMAL(TYPE) \ - template struct TransposeNormal - -DEFINE_GPU_TRANS_NORMAL(float16); -DEFINE_GPU_TRANS_NORMAL(bfloat16); -DEFINE_GPU_TRANS_NORMAL(float); -DEFINE_GPU_TRANS_NORMAL(double); -DEFINE_GPU_TRANS_NORMAL(int); -DEFINE_GPU_TRANS_NORMAL(int64_t); -DEFINE_GPU_TRANS_NORMAL(bool); -DEFINE_GPU_TRANS_NORMAL(int16_t); -DEFINE_GPU_TRANS_NORMAL(uint8_t); -DEFINE_GPU_TRANS_NORMAL(int8_t); -DEFINE_GPU_TRANS_NORMAL(paddle::platform::complex); -DEFINE_GPU_TRANS_NORMAL(paddle::platform::complex); - -struct TensorSetConstantGPU { - TensorSetConstantGPU(const platform::DeviceContext& context, - framework::Tensor* tensor, float value) - : context_(context), tensor_(tensor), value_(value) {} - - template - void apply() const { - SetConstant functor; - functor(reinterpret_cast(context_), - tensor_, static_cast(value_)); - } - - const platform::DeviceContext& context_; - framework::Tensor* tensor_; - float value_; -}; - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - framework::VisitDataType(tensor->type(), - TensorSetConstantGPU(context, tensor, value)); -} - -template -__global__ void RowwiseAddKernel(const T* a, const T* b, T* c, int width, - int num) { - T tmp = 1.0 / width; - CUDA_KERNEL_LOOP(i, num) { - int h = i * tmp; - int w = i - h * width; - c[i] = a[i] + b[w]; - } -} - -template -struct RowwiseAdd { - void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& vector, framework::Tensor* output) { - auto in_dims = input.dims(); - auto out_dims = output->dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ( - vector.numel(), size, - platform::errors::InvalidArgument( - "The input vector size" - " should be equal to the size of each row of input tensor." - " Expected vector size=%d, but received %d", - size, vector.numel())); - const char* in_dims_cstr = in_dims.to_str().c_str(); - const char* out_dims_cstr = out_dims.to_str().c_str(); - PADDLE_ENFORCE_EQ( - out_dims, in_dims, - platform::errors::InvalidArgument( - "The output tensor shape should be same as the input tensor" - " shape. Expected output tensor shape: %s," - " but received %s", - in_dims_cstr, out_dims_cstr)); - int blocks = 512; - int grids = (input.numel() + blocks - 1) / blocks; - RowwiseAddKernel<<>>( - input.data(), vector.data(), output->data(), - static_cast(in_dims[1]), static_cast(input.numel())); - } -}; - -template struct RowwiseAdd; -template struct RowwiseAdd; -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; -// template struct ColwiseSum; -// The ColwiseSum failed in debug mode, -// and only failed for this case. So reimplemented it. -template <> -void ColwiseSum::operator()( - const platform::CUDADeviceContext& context, const framework::Tensor& input, - framework::Tensor* vector) { - auto in_dims = input.dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ(vector->numel(), size, - platform::errors::InvalidArgument( - "The size of input vector" - " should be equal to the size of input tensor column" - " dimension. Expected vector size=%d, but received %d", - size, vector->numel())); - framework::Tensor one; - one.mutable_data({in_dims[0]}, context.GetPlace()); - SetConstant set; - set(context, &one, static_cast(1.0)); - GetBlas(context).GEMV( - true, static_cast(in_dims[0]), static_cast(in_dims[1]), 1.0, - input.data(), one.data(), 0.0, vector->data()); -} - -template struct RowwiseSum; -// template struct RowwiseSum; -// TODO(zcd): Following ColwiseSum format, need to confirm. -// The RowwiseSum failed in debug mode, -// and only failed for this case. So reimplemented it. -template <> -void RowwiseSum::operator()( - const platform::CUDADeviceContext& context, const framework::Tensor& input, - framework::Tensor* vector) { - auto in_dims = input.dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ(vector->numel(), in_dims[0], - platform::errors::InvalidArgument( - "The size of input vector" - " should be equal to the size of input tensor row" - " dimension. Expected vector size=%d, but received %d", - in_dims[0], vector->numel())); - framework::Tensor one; - one.mutable_data({size}, context.GetPlace()); - SetConstant set; - set(context, &one, static_cast(1.0)); - GetBlas(context).GEMV( - true, static_cast(in_dims[1]), static_cast(in_dims[0]), 1.0, - one.data(), input.data(), 0.0, vector->data()); -} - -template struct RowwiseMean; -template struct RowwiseMean; - -template -struct ElementwiseAddTo { - void operator()(platform::CUDADeviceContext* ctx, - const framework::Tensor& src, framework::Tensor* dst) { - auto in = framework::EigenVector::Flatten(src); - auto out = framework::EigenVector::Flatten(*dst); - auto& place = *(ctx->eigen_device()); - out.device(place) = out + in; - } -}; - -template struct ElementwiseAddTo; - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/math/math_function.h b/paddle/fluid/operators/math/math_function.h deleted file mode 100644 index 9dbbf455f1833..0000000000000 --- a/paddle/fluid/operators/math/math_function.h +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/pten/core/dense_tensor.h" - -namespace paddle { -namespace operators { -namespace math { - -template -struct TransposeNormal { - // for dims >= 7 situation - void operator()(const DeviceContext& context, const framework::Tensor& in, - framework::Tensor* out, const std::vector& axis); -}; - -template -struct Transpose { - void operator()(const DeviceContext& context, const framework::Tensor& in, - framework::Tensor* out, const std::vector& axis); -}; - -template -struct SetConstant { - void operator()(const DeviceContext& context, framework::Tensor* tensor, - T num); -}; - -template -void set_constant_with_place(const platform::DeviceContext& context, - framework::Tensor* tensor, float value); - -void set_constant(const platform::DeviceContext& context, - framework::Tensor* tensor, float value); - -template -struct RowwiseAdd { - void operator()(const DeviceContext& context, const framework::Tensor& input, - const framework::Tensor& vec, framework::Tensor* output); -}; - -template -struct ElementwiseAddTo { - // dst = dst + src - void operator()(DeviceContext* ctx, const framework::Tensor& src, - framework::Tensor* dst); -}; - -template -struct ColwiseSum { - void operator()(const DeviceContext& context, const framework::Tensor& input, - framework::Tensor* vec); -}; - -template -struct RowwiseSum { - void operator()(const DeviceContext& context, const framework::Tensor& input, - framework::Tensor* vec); -}; - -template -struct RowwiseMean { - void operator()(const DeviceContext& context, const framework::Tensor& input, - framework::Tensor* vec); -}; - -#ifdef PADDLE_WITH_XPU -template -struct TensorSetConstantXPU { - TensorSetConstantXPU(framework::Tensor* tensor, U value, - platform::Place place) - : tensor_(tensor), value_(value), place_(place) {} - template - void apply() const { - auto* begin = tensor_->mutable_data(place_); - int numel = tensor_->numel(); - std::unique_ptr data_cpu(new T[numel]); - std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast(value_)); - memory::Copy(place_, begin, platform::CPUPlace(), - static_cast(data_cpu.get()), numel * sizeof(T)); - } - framework::Tensor* tensor_; - U value_; - platform::Place place_; -}; -#endif - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/math/matrix_solve.cu.cc b/paddle/fluid/operators/math/matrix_solve.cu.cc index 8aaac0295c818..ee6610eae1469 100644 --- a/paddle/fluid/operators/math/matrix_solve.cu.cc +++ b/paddle/fluid/operators/math/matrix_solve.cu.cc @@ -15,9 +15,9 @@ limitations under the License. */ #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/solve_op.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace platform { @@ -76,7 +76,7 @@ class MatrixSolveFunctor { const auto& new_dims_vec = getNewDimsVec(b_dims); tmp_b.Resize(framework::make_ddim(new_dims_vec)); tmp_b.mutable_data(context.GetPlace()); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; std::vector new_axis = getNewAxis(b_rank); trans(context, b, &tmp_b, new_axis); @@ -149,7 +149,7 @@ class MatrixSolveFunctor { -host_info)); // transpose tmp_b to get the final result in row-major form. - math::TransposeNormal trans2; + pten::funcs::TransposeNormal trans2; trans2(context, tmp_b, out, new_axis); #else diff --git a/paddle/fluid/operators/math/prelu.h b/paddle/fluid/operators/math/prelu.h index 70aae2ba59e2c..24c8721656b88 100644 --- a/paddle/fluid/operators/math/prelu.h +++ b/paddle/fluid/operators/math/prelu.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/sample_prob.cu b/paddle/fluid/operators/math/sample_prob.cu index f596c1bc3dcf3..edc61bc667f5a 100644 --- a/paddle/fluid/operators/math/sample_prob.cu +++ b/paddle/fluid/operators/math/sample_prob.cu @@ -22,9 +22,9 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sample_prob.h" #include "paddle/fluid/operators/math/sampler.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/segment_pooling.cu b/paddle/fluid/operators/math/segment_pooling.cu index 0cbfaa4c5df7b..eaed2dc7d7e1d 100644 --- a/paddle/fluid/operators/math/segment_pooling.cu +++ b/paddle/fluid/operators/math/segment_pooling.cu @@ -14,10 +14,10 @@ limitations under the License. */ #include #include "paddle/fluid/operators/gather.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/segment_pooling.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index 8cd3e1367d86d..b921e844c9f21 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -129,7 +129,7 @@ struct SelectedRowsAddTensor { "But recieved input width = [%d], output width = [%d]", in1_row_numel, output->numel() / in1_height)); - SetConstant functor; + pten::funcs::SetConstant functor; functor(context, output, 0.0); auto* in1_data = in1_value.data(); @@ -461,7 +461,7 @@ struct MergeAdd { out.set_rows(merge_rows); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context, out.mutable_value(), static_cast(0.f)); std::unordered_map rows_to_id; @@ -689,7 +689,7 @@ struct MergeAverage { out.set_rows(merge_rows); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context, out.mutable_value(), 0.0); std::unordered_map rows_to_id; diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index 2ae2aaebb6c53..d2caf82c93a52 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -15,10 +15,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -156,7 +156,7 @@ struct SelectedRowsAddTensor { auto* in2_data = input2.data(); auto* out_data = output->data(); - SetConstant functor; + pten::funcs::SetConstant functor; functor(context, output, static_cast(0)); const int block_size = 256; @@ -348,7 +348,7 @@ struct MergeAdd { {static_cast(merge_rows.size()), input_width}), context.GetPlace()); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context, out.mutable_value(), static_cast(0)); auto* out_data = out.mutable_value()->data(); @@ -411,7 +411,7 @@ struct MergeAdd { {static_cast(merge_rows.size()), input_width}), context.GetPlace()); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context, out.mutable_value(), static_cast(0)); auto* out_data = out.mutable_value()->data(); diff --git a/paddle/fluid/operators/math/selected_rows_functor.h b/paddle/fluid/operators/math/selected_rows_functor.h index 690082036c5e0..e0ac583f15b60 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.h +++ b/paddle/fluid/operators/math/selected_rows_functor.h @@ -19,8 +19,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define INLINE_FOR2(sizei, sizej) \ for (int64_t i = 0; i < sizei; i++) \ diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cc index 19e70f924f15e..9cb815e161173 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cc @@ -15,14 +15,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "gtest/gtest.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" TEST(selected_rows_functor, cpu_add) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -122,9 +120,7 @@ TEST(selected_rows_functor, cpu_add) { TEST(selected_rows_functor, cpu_add_to) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -221,9 +217,7 @@ TEST(selected_rows_functor, cpu_add_to) { TEST(selected_rows_functor, cpu_merge_average_float) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -260,9 +254,7 @@ TEST(selected_rows_functor, cpu_merge_average_float) { TEST(selected_rows_functor, cpu_merge_add_float) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -301,8 +293,7 @@ TEST(selected_rows_functor, cpu_merge_add_float) { TEST(selected_rows_functor, cpu_merge_add_int) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -341,9 +332,7 @@ TEST(selected_rows_functor, cpu_merge_add_int) { TEST(selected_rows_functor, cpu_merge_add_multi) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - set_const; + pten::funcs::SetConstant set_const; int64_t height = 10; int64_t row_numel = 8; @@ -397,9 +386,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) { TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - set_const; + pten::funcs::SetConstant set_const; int64_t height = 10; int64_t row_numel = 8; @@ -459,9 +446,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { TEST(selected_rows_functor, cpu_sum_to) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; std::vector rows1{0, 4, 7}; diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc index e826c2a7244f7..1bae95e15840c 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "gtest/gtest.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" TEST(selected_rows_functor, gpu_add) { paddle::platform::CUDAPlace gpu_place(0); @@ -22,9 +22,7 @@ TEST(selected_rows_functor, gpu_add) { paddle::platform::CUDADeviceContext& ctx = *reinterpret_cast( paddle::platform::DeviceContextPool::Instance().Get(gpu_place)); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -144,9 +142,7 @@ TEST(selected_rows_functor, gpu_add_to) { paddle::platform::CUDADeviceContext& ctx = *reinterpret_cast( paddle::platform::DeviceContextPool::Instance().Get(gpu_place)); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -254,8 +250,7 @@ TEST(selected_rows_functor, gpu_merge_add) { paddle::platform::CUDADeviceContext& ctx = *reinterpret_cast( paddle::platform::DeviceContextPool::Instance().Get(gpu_place)); - paddle::operators::math::SetConstant + pten::funcs::SetConstant set_const; int64_t height = 10; diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index 2eee4d0a6c14e..22cd435297341 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_pooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -191,7 +191,7 @@ class MaxSeqPoolGradFunctor { const int* max_index = index.data(); T* ig_data = in_grad->data(); - SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context, in_grad, static_cast(0.0)); int64_t num_seq = og_dims[0]; int64_t dim = out_grad.numel() / num_seq; @@ -409,7 +409,7 @@ class SequencePoolGradFunctor { if (pooltype == "LAST" || pooltype == "FIRST") { // set X@Grad be zero at first when pooltype is LAST/FIRST - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(context, in_grad, 0); } diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index b3e1922e10657..3bf3b483e8905 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -14,10 +14,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_pooling.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/macros.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/softmax.cu b/paddle/fluid/operators/math/softmax.cu index bc32e068f566d..632fc1d4b29fe 100644 --- a/paddle/fluid/operators/math/softmax.cu +++ b/paddle/fluid/operators/math/softmax.cu @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/softmax.h" #include "paddle/fluid/operators/math/softmax_impl.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/sparse_impl.cu.h b/paddle/fluid/operators/math/sparse_impl.cu.h index 8ff2f4b27df43..728cf0fcd0b0f 100644 --- a/paddle/fluid/operators/math/sparse_impl.cu.h +++ b/paddle/fluid/operators/math/sparse_impl.cu.h @@ -14,8 +14,8 @@ #pragma once -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/dynload/cusparse.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" diff --git a/paddle/fluid/operators/math/tree2col.cc b/paddle/fluid/operators/math/tree2col.cc index af5df27207ace..85d71b369a153 100644 --- a/paddle/fluid/operators/math/tree2col.cc +++ b/paddle/fluid/operators/math/tree2col.cc @@ -91,7 +91,7 @@ class Tree2ColFunctor { std::vector> tr; auto feature_dims = node_features.dims(); auto cpu_place = context.GetPlace(); - math::SetConstant constant; + pten::funcs::SetConstant constant; int64_t feature_size = feature_dims[1]; size_t patch_elem_size = 3 * static_cast(feature_size); size_t node_count = 0, patch_count = 0, patch_size; @@ -144,7 +144,7 @@ class Col2TreeFunctor { std::vector> tr; auto output_dims = out_grad.dims(); auto cpu_place = context.GetPlace(); - math::SetConstant constant; + pten::funcs::SetConstant constant; int64_t output_size = output_dims[1]; size_t grad_elem_size = 3 * static_cast(output_size); size_t node_count = 0, grad_count = 0; diff --git a/paddle/fluid/operators/math/tree2col.cu b/paddle/fluid/operators/math/tree2col.cu index 4f3ab31916558..4fcd1a1cf6b3e 100644 --- a/paddle/fluid/operators/math/tree2col.cu +++ b/paddle/fluid/operators/math/tree2col.cu @@ -13,8 +13,8 @@ // limitations under the License. #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/tree2col.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -56,7 +56,7 @@ class Tree2ColFunctor { auto cpu_place = platform::CPUPlace(); auto stream = context.stream(); auto feature_dims = node_features.dims(); - math::SetConstant constant; + pten::funcs::SetConstant constant; Tensor EdgeSet_cpu; framework::TensorCopy(EdgeSet, cpu_place, &EdgeSet_cpu); @@ -128,7 +128,7 @@ class Col2TreeFunctor { auto cpu_place = platform::CPUPlace(); auto stream = context.stream(); auto output_dims = patch_grad.dims(); - math::SetConstant constant; + pten::funcs::SetConstant constant; Tensor EdgeSet_cpu; framework::TensorCopy(EdgeSet, cpu_place, &EdgeSet_cpu); diff --git a/paddle/fluid/operators/math/tree2col.h b/paddle/fluid/operators/math/tree2col.h index 632777c9cd961..5cf7a93f4d4a1 100644 --- a/paddle/fluid/operators/math/tree2col.h +++ b/paddle/fluid/operators/math/tree2col.h @@ -18,7 +18,7 @@ #include #include #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 717c1b5c0ed15..6b24f4778442b 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -128,7 +128,7 @@ static framework::Tensor FoldHeadAndLastDims(const DeviceContext &context, output.Resize({in_dims[1], in_dims[0], in_dims[2]}); output.mutable_data(context.GetPlace()); std::vector axis = {1, 0, 2}; - math::Transpose trans; + pten::funcs::Transpose trans; trans(context, input, &output, axis); output.Resize({in_dims[1], in_dims[0] * in_dims[2]}); diff --git a/paddle/fluid/operators/matrix_power_op.h b/paddle/fluid/operators/matrix_power_op.h index 6c4b8860bf8c6..93755b22bf93a 100644 --- a/paddle/fluid/operators/matrix_power_op.h +++ b/paddle/fluid/operators/matrix_power_op.h @@ -170,7 +170,7 @@ void MatrixPowerGradFunction(const Tensor* X, const Tensor* Out, if (n == 0) { // \nabla X = O - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, dX, static_cast(0)); return; } else if (n == 1) { diff --git a/paddle/fluid/operators/matrix_rank_op.cu b/paddle/fluid/operators/matrix_rank_op.cu index 7362d00afb76f..d974d7c1b78f1 100644 --- a/paddle/fluid/operators/matrix_rank_op.cu +++ b/paddle/fluid/operators/matrix_rank_op.cu @@ -19,11 +19,11 @@ limitations under the License. */ #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/complex_functors.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/matrix_rank_op.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/platform/dynload/cusolver.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/maxout_op.h b/paddle/fluid/operators/maxout_op.h index 64b538fc5d5bd..d1c229342b961 100644 --- a/paddle/fluid/operators/maxout_op.h +++ b/paddle/fluid/operators/maxout_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/maxouting.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -57,7 +57,7 @@ class MaxOutGradKernel : public framework::OpKernel { } auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0.0)); diff --git a/paddle/fluid/operators/mean_iou_op.cu b/paddle/fluid/operators/mean_iou_op.cu index 79aff52a16fa9..48b34e18b8f3f 100644 --- a/paddle/fluid/operators/mean_iou_op.cu +++ b/paddle/fluid/operators/mean_iou_op.cu @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/mean_iou_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mlu/activation_op_mlu_test.cc b/paddle/fluid/operators/mlu/activation_op_mlu_test.cc index 9da3a4c48728e..555179e7cd11d 100644 --- a/paddle/fluid/operators/mlu/activation_op_mlu_test.cc +++ b/paddle/fluid/operators/mlu/activation_op_mlu_test.cc @@ -15,9 +15,9 @@ limitations under the License. */ #include #include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/mlu/device_context.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace fw = paddle::framework; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/mul_op.h b/paddle/fluid/operators/mul_op.h index 0fb32cf4be886..6ea154c25db5d 100644 --- a/paddle/fluid/operators/mul_op.h +++ b/paddle/fluid/operators/mul_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/fluid/operators/norm_utils.cu.h index 241c634e3fc98..562fe8a1bc819 100644 --- a/paddle/fluid/operators/norm_utils.cu.h +++ b/paddle/fluid/operators/norm_utils.cu.h @@ -25,8 +25,8 @@ limitations under the License. */ namespace cub = hipcub; #endif #include "paddle/fluid/framework/data_layout.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef __HIPCC__ #define LAUNCH_BOUNDS(BlockDim) __launch_bounds__(BlockDim) @@ -405,7 +405,7 @@ void NormDoubleGradFunctor(const framework::ExecutionContext &ctx, const T *ddbias_data = (ddBias == nullptr ? nullptr : ddBias->data()); auto &dev_ctx = ctx.template device_context(); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; auto &x_dims = X->dims(); const int C = (data_layout == DataLayout::kNCHW ? x_dims[1] diff --git a/paddle/fluid/operators/one_hot_op.cu b/paddle/fluid/operators/one_hot_op.cu index 2b021748048c7..092ffe78f5760 100644 --- a/paddle/fluid/operators/one_hot_op.cu +++ b/paddle/fluid/operators/one_hot_op.cu @@ -46,7 +46,7 @@ struct OneHotOpCUDAFunctor { auto numel = in_->numel(); auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); auto stream = ctx_.stream(); - math::set_constant(ctx_, out_, 0.0); + pten::funcs::set_constant(ctx_, out_, 0.0); FillOutputKernel<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, diff --git a/paddle/fluid/operators/one_hot_op.h b/paddle/fluid/operators/one_hot_op.h index e671a1e99e7f0..a5b3ff78e1472 100644 --- a/paddle/fluid/operators/one_hot_op.h +++ b/paddle/fluid/operators/one_hot_op.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -41,7 +41,7 @@ struct OneHotOpFunctor { auto* p_in_data = in_->data(); auto numel = in_->numel(); auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); - math::set_constant(ctx_, out_, 0.0); + pten::funcs::set_constant(ctx_, out_, 0.0); if (allow_out_of_range_) { for (int i = 0; i < numel; ++i) { diff --git a/paddle/fluid/operators/one_hot_v2_op.cu b/paddle/fluid/operators/one_hot_v2_op.cu index 115c946084683..d145455a1f1e5 100644 --- a/paddle/fluid/operators/one_hot_v2_op.cu +++ b/paddle/fluid/operators/one_hot_v2_op.cu @@ -47,7 +47,7 @@ struct OneHotV2OpCUDAFunctor { auto numel = in_->numel(); auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); auto stream = ctx_.stream(); - math::set_constant(ctx_, out_, 0.0); + pten::funcs::set_constant(ctx_, out_, 0.0); FillOutputKernel<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, diff --git a/paddle/fluid/operators/one_hot_v2_op.h b/paddle/fluid/operators/one_hot_v2_op.h index 221b8cf0e2ab8..c95909e3753d7 100644 --- a/paddle/fluid/operators/one_hot_v2_op.h +++ b/paddle/fluid/operators/one_hot_v2_op.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -41,7 +41,7 @@ struct OneHotV2OpFunctor { auto* p_in_data = in_->data(); auto numel = in_->numel(); auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); - math::set_constant(ctx_, out_, 0.0); + pten::funcs::set_constant(ctx_, out_, 0.0); if (allow_out_of_range_) { for (int i = 0; i < numel; ++i) { diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cc b/paddle/fluid/operators/optimizers/adagrad_op.cc index 31d3e1208dadb..d865f7cff22e0 100644 --- a/paddle/fluid/operators/optimizers/adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/adagrad_op.cc @@ -17,8 +17,8 @@ limitations under the License. */ #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cu b/paddle/fluid/operators/optimizers/adagrad_op.cu index a7c32255bd1ee..5c970ceffb022 100644 --- a/paddle/fluid/operators/optimizers/adagrad_op.cu +++ b/paddle/fluid/operators/optimizers/adagrad_op.cu @@ -11,10 +11,10 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/operators/optimizers/adagrad_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/overlap_add_op.h b/paddle/fluid/operators/overlap_add_op.h index 865659ee942e4..b69f99bc985c7 100644 --- a/paddle/fluid/operators/overlap_add_op.h +++ b/paddle/fluid/operators/overlap_add_op.h @@ -18,11 +18,11 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/seq2col.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/p_norm_op.cu b/paddle/fluid/operators/p_norm_op.cu index abbbffb6331f5..77159a7a08bfd 100644 --- a/paddle/fluid/operators/p_norm_op.cu +++ b/paddle/fluid/operators/p_norm_op.cu @@ -181,7 +181,7 @@ class PnormGradCUDAKernel : public framework::OpKernel { auto& cuda_ctx = ctx.template device_context(); if (porder == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(cuda_ctx, out_dx, static_cast(0)); } else if (porder == INFINITY || porder == -INFINITY) { AbsMaxAndMinGradFunctor functor; diff --git a/paddle/fluid/operators/p_norm_op.h b/paddle/fluid/operators/p_norm_op.h index 8fca6924a2541..17d1240636f0f 100644 --- a/paddle/fluid/operators/p_norm_op.h +++ b/paddle/fluid/operators/p_norm_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -119,7 +119,7 @@ class PnormGradKernel : public framework::OpKernel { Eigen::DSizes bcast(1, n, 1); if (porder == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, out_dx, static_cast(0)); } else if (porder == INFINITY || porder == -INFINITY) { diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index e50af02dcc4e0..3663cb954092c 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -415,7 +415,7 @@ class Pad2dGradCPUKernel : public framework::OpKernel { auto d_out_dims = d_out->dims(); const T* d_out_data = d_out->data(); T* d_in_data = d_in->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context.template device_context(), d_in, static_cast(0)); const int pad_top = pads[0]; diff --git a/paddle/fluid/operators/pad2d_op.cu b/paddle/fluid/operators/pad2d_op.cu index a854fa6091ab4..0c9e6ed2b7257 100644 --- a/paddle/fluid/operators/pad2d_op.cu +++ b/paddle/fluid/operators/pad2d_op.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -395,7 +395,7 @@ class Pad2dGradCUDAKernel : public framework::OpKernel { const T* d_out_data = d_out->data(); T* d_in_data = d_in->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context.template device_context(), d_in, static_cast(0)); diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index e84b5a9d9baae..e29718af894f1 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -621,7 +621,7 @@ class Pad3dGradCPUKernel : public framework::OpKernel { auto d_out_dims = d_out->dims(); const T* d_out_data = d_out->data(); T* d_in_data = d_in->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context.template device_context(), d_in, static_cast(0)); const int pad_left = pads[0]; diff --git a/paddle/fluid/operators/pad3d_op.cu b/paddle/fluid/operators/pad3d_op.cu index 1567251236550..b7cf1be99fe14 100644 --- a/paddle/fluid/operators/pad3d_op.cu +++ b/paddle/fluid/operators/pad3d_op.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -697,7 +697,7 @@ class Pad3dGradCUDAKernel : public framework::OpKernel { const T* d_out_data = d_out->data(); T* d_in_data = d_in->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context.template device_context(), d_in, static_cast(0)); diff --git a/paddle/fluid/operators/pixel_shuffle_op.h b/paddle/fluid/operators/pixel_shuffle_op.h index b2a0db0f838d5..4ae138ac7af34 100644 --- a/paddle/fluid/operators/pixel_shuffle_op.h +++ b/paddle/fluid/operators/pixel_shuffle_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -52,7 +52,7 @@ class PixelShuffleOpKernel : public framework::OpKernel { } else { o.Resize({in_dims[0], in_dims[1], factor, in_dims[2], factor, o_dims[3]}); } - math::Transpose trans; + pten::funcs::Transpose trans; auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, t, &o, axis); out->Resize(o_dims); @@ -95,7 +95,7 @@ class PixelShuffleGradOpKernel : public framework::OpKernel { o.Resize( {do_dims[0], dx_dims[1], dx_dims[2], do_dims[3], factor, factor}); } - math::Transpose trans; + pten::funcs::Transpose trans; auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, t, &o, axis); dx->Resize(dx_dims); diff --git a/paddle/fluid/operators/poisson_op.h b/paddle/fluid/operators/poisson_op.h index 2159637b290c9..d2deb21567161 100644 --- a/paddle/fluid/operators/poisson_op.h +++ b/paddle/fluid/operators/poisson_op.h @@ -17,7 +17,7 @@ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -31,7 +31,7 @@ class PoissonGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto& dev_ctx = ctx.template device_context(); functor(dev_ctx, dx, static_cast(0)); } diff --git a/paddle/fluid/operators/pool_cudnn_op.cu.cc b/paddle/fluid/operators/pool_cudnn_op.cu.cc index bbe3174012947..2b0300b87c268 100644 --- a/paddle/fluid/operators/pool_cudnn_op.cu.cc +++ b/paddle/fluid/operators/pool_cudnn_op.cu.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/pool_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_HIP #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/operator.h" @@ -114,7 +114,7 @@ class PoolCUDNNOpKernel : public framework::OpKernel { transformed_input.Resize(framework::make_ddim(in_dims_vec)); transformed_input.mutable_data(ctx.GetPlace(), input->type()); - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, *input, &transformed_input, axis); // output @@ -142,7 +142,7 @@ class PoolCUDNNOpKernel : public framework::OpKernel { transformed_input.Resize(framework::make_ddim(in_dims_vec)); transformed_input.mutable_data(ctx.GetPlace(), input->type()); - math::Transpose trans; + pten::funcs::Transpose trans; trans(dev_ctx, *input, &transformed_input, axis); transformed_output.Resize(output->dims()); @@ -221,7 +221,8 @@ class PoolCUDNNOpKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); std::vector axis{0, 2, 3, 4, 1}; - math::Transpose trans5_v2; + pten::funcs::Transpose + trans5_v2; trans5_v2(dev_ctx, transformed_output, output, axis); } #ifdef PADDLE_WITH_HIP @@ -230,7 +231,7 @@ class PoolCUDNNOpKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); std::vector axis{0, 2, 3, 1}; - math::Transpose trans; + pten::funcs::Transpose trans; trans(dev_ctx, transformed_output, output, axis); } #endif @@ -337,7 +338,7 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { transformed_input.Resize(framework::make_ddim(in_dims_vec)); transformed_input.mutable_data(ctx.GetPlace(), input->type()); - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, *input, &transformed_input, axis); // output @@ -351,14 +352,16 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { transformed_output.mutable_data(ctx.GetPlace(), output->type()); - math::Transpose trans5_v2; + pten::funcs::Transpose + trans5_v2; trans5_v2(dev_ctx, *output, &transformed_output, axis); // output grad transformed_output_grad.Resize(framework::make_ddim(out_dims_vec)); transformed_output_grad.mutable_data(ctx.GetPlace(), output_grad->type()); - math::Transpose trans5_v3; + pten::funcs::Transpose + trans5_v3; trans5_v3(dev_ctx, *output_grad, &transformed_output_grad, axis); // input grad @@ -381,7 +384,7 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { transformed_input.Resize(framework::make_ddim(in_dims_vec)); transformed_input.mutable_data(ctx.GetPlace(), input->type()); - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, *input, &transformed_input, axis); // output @@ -394,14 +397,16 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { transformed_output.mutable_data(ctx.GetPlace(), output->type()); - math::Transpose trans4_v2; + pten::funcs::Transpose + trans4_v2; trans4_v2(dev_ctx, *output, &transformed_output, axis); // output grad transformed_output_grad.Resize(framework::make_ddim(out_dims_vec)); transformed_output_grad.mutable_data(ctx.GetPlace(), output_grad->type()); - math::Transpose trans4_v3; + pten::funcs::Transpose + trans4_v3; trans4_v3(dev_ctx, *output_grad, &transformed_output_grad, axis); // input grad @@ -485,7 +490,8 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); std::vector axis{0, 2, 3, 4, 1}; - math::Transpose trans5_v4; + pten::funcs::Transpose + trans5_v4; trans5_v4(dev_ctx, transformed_input_grad, input_grad, axis); } #ifdef PADDLE_WITH_HIP @@ -494,7 +500,8 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); std::vector axis{0, 2, 3, 1}; - math::Transpose trans4_v4; + pten::funcs::Transpose + trans4_v4; trans4_v4(dev_ctx, transformed_input_grad, input_grad, axis); } #endif diff --git a/paddle/fluid/operators/pool_op.h b/paddle/fluid/operators/pool_op.h index 9e2f6cf223b08..d220b13d18dc2 100644 --- a/paddle/fluid/operators/pool_op.h +++ b/paddle/fluid/operators/pool_op.h @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(__HIPCC__) || defined(__NVCC__) #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" #endif @@ -299,7 +299,7 @@ class PoolGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); - paddle::operators::math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(dev_ctx, in_x_grad, static_cast(0.0)); switch (ksize.size()) { diff --git a/paddle/fluid/operators/pool_with_index_op.h b/paddle/fluid/operators/pool_with_index_op.h index 065d90704cf77..d039598a8a04e 100644 --- a/paddle/fluid/operators/pool_with_index_op.h +++ b/paddle/fluid/operators/pool_with_index_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -92,7 +92,7 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel { if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); auto& device_ctx = context.template device_context(); - math::set_constant(device_ctx, in_x_grad, 0); + pten::funcs::set_constant(device_ctx, in_x_grad, 0); switch (ksize.size()) { case 2: { diff --git a/paddle/fluid/operators/prroi_pool_op.cu b/paddle/fluid/operators/prroi_pool_op.cu index 71aaf08c5256a..256bc0473b466 100644 --- a/paddle/fluid/operators/prroi_pool_op.cu +++ b/paddle/fluid/operators/prroi_pool_op.cu @@ -327,7 +327,7 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel { dev_ctx.stream()); input_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.cuda_device_context(), input_grad, static_cast(0)); input_roi_grad->mutable_data(ctx.GetPlace()); set_zero(ctx.cuda_device_context(), input_roi_grad, static_cast(0)); diff --git a/paddle/fluid/operators/prroi_pool_op.h b/paddle/fluid/operators/prroi_pool_op.h index 38f8d6542ac32..63f0047aa954c 100644 --- a/paddle/fluid/operators/prroi_pool_op.h +++ b/paddle/fluid/operators/prroi_pool_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(__NVCC__) || defined(__HIPCC__) #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #endif @@ -500,7 +500,7 @@ class CPUPRROIPoolGradOpKernel : public framework::OpKernel { input_grad->mutable_data(ctx.GetPlace()); input_roi_grad->mutable_data(ctx.GetPlace()); // set gradient of X to be 0. before backpropagate. - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.template device_context(), input_grad, static_cast(0)); set_zero(ctx.template device_context(), input_roi_grad, diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc index 277c93fad6aa8..15b1aab855135 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc @@ -13,8 +13,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/pscore/distributed_lookup_table_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h index d715bf34a49ef..af423f71b0d7c 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h @@ -18,7 +18,7 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc index 3a1e2ea78619b..b481235956d20 100644 --- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc +++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc @@ -13,8 +13,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/pscore/distributed_push_sparse_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.h b/paddle/fluid/operators/pscore/distributed_push_sparse_op.h index f19ba5f2e41da..c07ffa4bd0e0a 100644 --- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.h +++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.h @@ -18,7 +18,7 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/fake_init_op.cc b/paddle/fluid/operators/pscore/fake_init_op.cc index b3a745fc99538..d337aa8b0102c 100644 --- a/paddle/fluid/operators/pscore/fake_init_op.cc +++ b/paddle/fluid/operators/pscore/fake_init_op.cc @@ -11,7 +11,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/psroi_pool_op.cu b/paddle/fluid/operators/psroi_pool_op.cu index efdcc59a5c49e..9bca5d86d4a08 100644 --- a/paddle/fluid/operators/psroi_pool_op.cu +++ b/paddle/fluid/operators/psroi_pool_op.cu @@ -317,7 +317,7 @@ class GPUPSROIPoolGradOpKernel : public framework::OpKernel { ctx.device_context(), &rois_batch_id_list_gpu); input_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.cuda_device_context(), input_grad, static_cast(0)); int output_grad_size = output_grad->numel(); diff --git a/paddle/fluid/operators/psroi_pool_op.h b/paddle/fluid/operators/psroi_pool_op.h index 4d7e9ce295fc8..ed5221648fdff 100644 --- a/paddle/fluid/operators/psroi_pool_op.h +++ b/paddle/fluid/operators/psroi_pool_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -225,7 +225,7 @@ class CPUPSROIPoolGradOpKernel : public framework::OpKernel { T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); // set gradient of X to be 0. before backpropagate. - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.template device_context(), input_grad, static_cast(0)); diff --git a/paddle/fluid/operators/put_along_axis_op.cu b/paddle/fluid/operators/put_along_axis_op.cu index da36b564337da..800da8a275c2d 100644 --- a/paddle/fluid/operators/put_along_axis_op.cu +++ b/paddle/fluid/operators/put_along_axis_op.cu @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/put_along_axis_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/put_along_axis_op.h b/paddle/fluid/operators/put_along_axis_op.h index f23ca177db9c5..0b4481ceacf73 100644 --- a/paddle/fluid/operators/put_along_axis_op.h +++ b/paddle/fluid/operators/put_along_axis_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather_scatter_kernel.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/qr_op.h b/paddle/fluid/operators/qr_op.h index 1731aa9e07206..c55619a4f76e7 100644 --- a/paddle/fluid/operators/qr_op.h +++ b/paddle/fluid/operators/qr_op.h @@ -142,7 +142,7 @@ class QrGradKernel : public framework::OpKernel { *ctx.Output(framework::GradVarName("X")); dA.mutable_data>(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); - math::SetConstant()(dev_ctx, &dA, T(0)); + pten::funcs::SetConstant()(dev_ctx, &dA, T(0)); auto dito = math::DeviceIndependenceTensorOperations(ctx); diff --git a/paddle/fluid/operators/range_op.h b/paddle/fluid/operators/range_op.h index 5344147a9069c..aca9d50c32738 100644 --- a/paddle/fluid/operators/range_op.h +++ b/paddle/fluid/operators/range_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/range_op_npu_test.cc b/paddle/fluid/operators/range_op_npu_test.cc index 081cafdf67b99..00486dbed8bf2 100644 --- a/paddle/fluid/operators/range_op_npu_test.cc +++ b/paddle/fluid/operators/range_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(range); USE_OP_DEVICE_KERNEL(range, NPU); diff --git a/paddle/fluid/operators/rank_attention.cu.h b/paddle/fluid/operators/rank_attention.cu.h index 8ec138c8824fa..3eb4d8401ab26 100644 --- a/paddle/fluid/operators/rank_attention.cu.h +++ b/paddle/fluid/operators/rank_attention.cu.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/dim.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc index 1eeeb5e1f8aa1..f8ed44267e931 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc @@ -27,12 +27,11 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; using Tensor = paddle::framework::Tensor; diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 3b8ea60963d62..cc17453b9d839 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/cast_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/reduce_ops/reduce_op_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" @@ -102,7 +102,7 @@ void GetShuffledInput(const framework::ExecutionContext& context, shuffled_input->Resize(shuffled_dims); shuffled_input->mutable_data(context.GetPlace()); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; trans(context.template device_context(), *input, shuffled_input, perm_axis); } @@ -166,7 +166,7 @@ void HandleLargeDimGrad(const framework::ExecutionContext& context, framework::TensorCopy(*dx, context.GetPlace(), &dx_tmp); dx_tmp.Resize(shuffled_dim); dx->Resize(x_dim); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; trans(context.template device_context(), dx_tmp, dx, origin_axis); } diff --git a/paddle/fluid/operators/repeat_interleave_op.h b/paddle/fluid/operators/repeat_interleave_op.h index 1a38b0271dd07..ca861696d719e 100644 --- a/paddle/fluid/operators/repeat_interleave_op.h +++ b/paddle/fluid/operators/repeat_interleave_op.h @@ -16,7 +16,7 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/index_select_op.h" namespace paddle { diff --git a/paddle/fluid/operators/rnn_op.cu.cc b/paddle/fluid/operators/rnn_op.cu.cc index 80a0ef10fa150..94becaa43f002 100644 --- a/paddle/fluid/operators/rnn_op.cu.cc +++ b/paddle/fluid/operators/rnn_op.cu.cc @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -719,7 +719,7 @@ class RNNGradCudnnKernel : public framework::OpKernel { } Tensor weight_grad; - math::SetConstant zero; + pten::funcs::SetConstant zero; weight_grad.mutable_data({weight_numel}, ctx.GetPlace()); zero(dev_ctx, &weight_grad, static_cast(0.0)); T *weight_grad_data = weight_grad.data(); diff --git a/paddle/fluid/operators/rnn_op.h b/paddle/fluid/operators/rnn_op.h index 5e19be5e4cfe1..b2c1b8b9895d3 100644 --- a/paddle/fluid/operators/rnn_op.h +++ b/paddle/fluid/operators/rnn_op.h @@ -25,9 +25,9 @@ limitations under the License. */ #include "paddle/fluid/operators/math/fc.h" #include "paddle/fluid/operators/math/gru_compute.h" #include "paddle/fluid/operators/math/lstm_compute.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/unique_op.h" #include "paddle/fluid/operators/utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -305,7 +305,7 @@ struct Layer { framework::TensorCopy(bias_hh, context.GetPlace(), dev_ctx, &bias_hh_tmp); bias_hh_tmp.Resize({3, bias_hh_tmp.numel() / 3}); auto bias_hh_tmp_unbind = Unbind(bias_hh_tmp); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, &bias_hh_tmp_unbind[2], static_cast(0.0)); auto bias_hh_after_mask = framework::EigenMatrix::From( @@ -439,7 +439,7 @@ struct Layer { &weight_hh_tmp); weight_hh_tmp.Resize({3, weight_hh_tmp.numel() / 3}); auto weight_hh_tmp_unbind = Unbind(weight_hh_tmp); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, &weight_hh_tmp_unbind[2], static_cast(0.0)); weight_hh_tmp.Resize(vec[1 + offset * 4].dims()); } @@ -585,7 +585,7 @@ struct Layer { &weight_hh_tmp); weight_hh_tmp.Resize({3, weight_hh_tmp.numel() / 3}); auto weight_hh_tmp_unbind = Unbind(weight_hh_tmp); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, &weight_hh_tmp_unbind[2], static_cast(0.0)); weight_hh_tmp.Resize(vec[1 + offset * 4].dims()); } @@ -966,7 +966,7 @@ class RNNCPUKernel : public framework::OpKernel { dropout_mask->mutable_data(output->dims(), ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); - math::SetConstant ones; + pten::funcs::SetConstant ones; ones(dev_ctx, dropout_mask, static_cast(1)); // init the output and allocate the memory output->mutable_data(ctx.GetPlace()); @@ -1095,7 +1095,7 @@ struct GradLayer { Tensor c, d; Tensor* dynamic_grad_pre_h = &c; Tensor* dynamic_grad_pre_c = &d; - math::SetConstant zero; + pten::funcs::SetConstant zero; if (init_h_grad_unbind->size() > 0) { dynamic_grad_pre_h->ShareDataWith( (*init_h_grad_unbind)[current_layer_idx]); @@ -1293,7 +1293,7 @@ struct GradLayer { mat_dim_parameter, static_cast(1.0), input_grad, T(1)); // calc the gradient of Bias_hi, Bias_hh - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; Tensor tmp_grad_gate; tmp_grad_gate.ShareDataWith(grad_gate); tmp_grad_gate.Resize( @@ -1328,7 +1328,7 @@ struct SingleGradLayer : GradLayer { const int& gate_num) { auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); const bool& is_bidirec = context.Attr("is_bidirec"); @@ -1425,7 +1425,7 @@ struct BidirGradLayer : GradLayer { // split the output two tensor to output_forward, output_backward auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); std::vector output_vec; @@ -1675,7 +1675,7 @@ struct GRUGradCell : GradCell { backup_tensor(context, &grad_pre_hidden_bak, grad_pre_hidden); } // zero pre_hidden - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, grad_pre_hidden, static_cast(0.0)); math::GRUMetaValue gru_value; math::GRUMetaGrad gru_grad; diff --git a/paddle/fluid/operators/roi_align_op.cu b/paddle/fluid/operators/roi_align_op.cu index 520023229fe1b..5c9c8b78a4bdd 100644 --- a/paddle/fluid/operators/roi_align_op.cu +++ b/paddle/fluid/operators/roi_align_op.cu @@ -395,7 +395,7 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel { memory::Copy(gplace, roi_id_data, cplace, roi_batch_id_data, bytes, dev_ctx.stream()); in_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, in_grad, static_cast(0)); int output_grad_size = out_grad->numel(); diff --git a/paddle/fluid/operators/roi_align_op.h b/paddle/fluid/operators/roi_align_op.h index 1ab5ddc83fb67..acae86bd1b382 100644 --- a/paddle/fluid/operators/roi_align_op.h +++ b/paddle/fluid/operators/roi_align_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -23,7 +23,7 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -namespace { +namespace { // NOLINT constexpr size_t get_offset(size_t x, size_t y, size_t width) { return y * width + x; } @@ -41,7 +41,7 @@ struct offsets_and_ratios { xy_ratio(xy_ratio), xY_ratio(xY_ratio), Xy_ratio(Xy_ratio), - XY_ratio(XY_ratio){}; + XY_ratio(XY_ratio) {} std::size_t xy = 0; std::size_t xY = 0; @@ -128,10 +128,10 @@ std::vector> get_indexes_and_ratios( } } return interpolation_cords; -} +} // namespace template -void interpolate(std::vector& interpolated_values, +void interpolate(std::vector& interpolated_values, // NOLINT const std::vector>& interpolation_cords, const T* data) { for (auto& ic : interpolation_cords) { @@ -167,7 +167,7 @@ void avg_pool(const std::vector& interpolated_values, T* output_data, output_data[i] = sum * count; } } -} +} // NOLINT template void bilinear_interpolate_gradient(const int height, const int width, T y, T x, @@ -389,7 +389,7 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel { } in_grad->mutable_data(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, in_grad, static_cast(0)); int output_grad_size = out_grad->numel(); diff --git a/paddle/fluid/operators/roi_align_op_npu.cc b/paddle/fluid/operators/roi_align_op_npu.cc index d6ccf84bbfb3e..7e19287d42565 100644 --- a/paddle/fluid/operators/roi_align_op_npu.cc +++ b/paddle/fluid/operators/roi_align_op_npu.cc @@ -10,8 +10,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/roi_align_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/roi_pool_op.cu b/paddle/fluid/operators/roi_pool_op.cu index 16a8e2bf586a7..eafb790285181 100644 --- a/paddle/fluid/operators/roi_pool_op.cu +++ b/paddle/fluid/operators/roi_pool_op.cu @@ -274,7 +274,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel { dev_ctx.stream()); x_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, x_grad, static_cast(0)); int output_grad_size = out_grad->numel(); diff --git a/paddle/fluid/operators/roi_pool_op.h b/paddle/fluid/operators/roi_pool_op.h index 40de6d0cf6abb..531fe241c4372 100644 --- a/paddle/fluid/operators/roi_pool_op.h +++ b/paddle/fluid/operators/roi_pool_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -212,7 +212,7 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel { const T* out_grad_data = out_grad->data(); const int64_t* argmax_data = argmax->data(); T* in_grad_data = in_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.template device_context(), in_grad, static_cast(0)); diff --git a/paddle/fluid/operators/row_conv_op.cu b/paddle/fluid/operators/row_conv_op.cu index 586cf3239b575..24f8ba4f21327 100644 --- a/paddle/fluid/operators/row_conv_op.cu +++ b/paddle/fluid/operators/row_conv_op.cu @@ -11,9 +11,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/row_conv_op.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -395,7 +395,7 @@ class RowConvGradKernel size_t *idx = batch_indices.CUDAMutableData(context.GetPlace()); auto &device_ctx = context.cuda_device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; if (dFilter) { T *dfilter = dFilter->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/sample_logits_op.cu b/paddle/fluid/operators/sample_logits_op.cu index 4bcd27036a530..3caa79a0bff9a 100644 --- a/paddle/fluid/operators/sample_logits_op.cu +++ b/paddle/fluid/operators/sample_logits_op.cu @@ -19,10 +19,10 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sample_prob.h" #include "paddle/fluid/operators/math/softmax.h" #include "paddle/fluid/operators/sample_logits_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -138,7 +138,7 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { // UNDERSTAND: allocate memories for temporaries sampled_logits->mutable_data(samples_dim, context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, sampled_logits, static_cast(0)); auto sampled_labels_data = @@ -224,7 +224,7 @@ class SampleLogitsGradCUDAKernel : public framework::OpKernel { logits_grad->mutable_data(context.GetPlace()); auto& dev_ctx = context.cuda_device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, logits_grad, static_cast(0)); // UNDERSTAND: scatter it back to logit_grad diff --git a/paddle/fluid/operators/sample_logits_op.h b/paddle/fluid/operators/sample_logits_op.h index 872eb341d49d5..f7560991a6a7c 100644 --- a/paddle/fluid/operators/sample_logits_op.h +++ b/paddle/fluid/operators/sample_logits_op.h @@ -19,9 +19,9 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sample_prob.h" #include "paddle/fluid/operators/math/softmax.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -286,7 +286,7 @@ class SampleLogitsGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, logits_grad, static_cast(0)); // UNDERSTAND: scatter it back to logit_grad diff --git a/paddle/fluid/operators/scatter.cu.h b/paddle/fluid/operators/scatter.cu.h index 13c08aea68849..a98d98e72adc5 100644 --- a/paddle/fluid/operators/scatter.cu.h +++ b/paddle/fluid/operators/scatter.cu.h @@ -15,11 +15,11 @@ limitations under the License. */ #pragma once #include #include -#include "math/math_function.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/search_compute.h b/paddle/fluid/operators/search_compute.h index d0618bf2c302b..3e8d270ca4f06 100644 --- a/paddle/fluid/operators/search_compute.h +++ b/paddle/fluid/operators/search_compute.h @@ -23,7 +23,7 @@ limitations under the License. */ #include #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/seed_op.cu b/paddle/fluid/operators/seed_op.cu index 5a8d1c067c3f2..5257e7709f91f 100644 --- a/paddle/fluid/operators/seed_op.cu +++ b/paddle/fluid/operators/seed_op.cu @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/seed_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -32,7 +32,7 @@ class GPUSeedKernel : public framework::OpKernel { platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(platform::CPUPlace()); out->mutable_data(platform::CPUPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), out, static_cast(seed)); } else { diff --git a/paddle/fluid/operators/segment_pool_op.h b/paddle/fluid/operators/segment_pool_op.h index 4f180a31ce518..47b18e04e4dcc 100644 --- a/paddle/fluid/operators/segment_pool_op.h +++ b/paddle/fluid/operators/segment_pool_op.h @@ -16,10 +16,10 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/segment_pooling.h" #include "paddle/fluid/platform/macros.h" #include "paddle/pten/common/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -60,7 +60,7 @@ void SegmentKernelLaunchHelper(const framework::ExecutionContext& context) { "Segment ids must be >= 0, but got last id %d", dims[0])); output->Resize({dims}); output->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, output, static_cast(0)); } @@ -98,7 +98,7 @@ void SegmentKernelLaunchHelper(const framework::ExecutionContext& context) { } else if (pooltype == "MIN") { init_value = static_cast(FLT_MAX); } - math::SetConstant setconst; + pten::funcs::SetConstant setconst; auto& dev_ctx = context.template device_context(); setconst(dev_ctx, output, static_cast(init_value)); // the gpu kernel of mean pool record the counts of segment_ids @@ -152,7 +152,7 @@ class SegmentPoolGradKernel : public framework::OpKernel { } in_g->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, in_g, static_cast(0)); diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h index f73b1804199c2..b43254f91fde7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/context_project.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -64,7 +64,7 @@ class SequenceConvKernel : public framework::OpKernel { Tensor col; col.mutable_data(col_shape, context.GetPlace()); // Because if padding_trainable is false, padding data should be zeros. - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); auto blas = math::GetBlas(dev_ctx); set_zero(dev_ctx, &col, static_cast(0)); @@ -107,7 +107,7 @@ class SequenceConvGradKernel : public framework::OpKernel { int down_pad = std::max(0, context_start + context_length - 1); auto sequence_width = static_cast(in->dims()[1]); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); auto blas = math::GetBlas(dev_ctx); // use col_shape in the im2col calculation diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h index 1186ed891e8c0..74baf67f7fe67 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -172,7 +172,7 @@ struct SequenceExpandGradFunctor { int dout_end = dout_offset + repeat_num * x_seq_len; auto dout_sub = dout.Slice(dout_offset, dout_end); dout_sub.Resize({repeat_num, dx_sub.dims()[0]}); - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; col_sum(context, dout_sub, &dx_sub); dout_offset += repeat_num * x_seq_len; } @@ -194,7 +194,7 @@ class SequenceExpandGradKernel : public framework::OpKernel { g_x->set_lod(x->lod()); auto& dev_ctx = context.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, g_x, static_cast(0)); auto& y_lod = y->lod(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h index a9660f05c3c6b..2b50995a6abb4 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_padding.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h index dca65512e32bc..bc279f1eb3110 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_pooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h index b5d212421135b..2cf81197f92ce 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h index 65e021b507a87..d5689091bec2b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/strided_memcpy.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -168,7 +168,7 @@ class SequenceSliceGradOpKernel : public framework::OpKernel { if (x_grad) { x_grad->mutable_data(ctx.GetPlace()); x_grad->set_lod(in->lod()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.template device_context(), x_grad, static_cast(0)); diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc index 46e4196585bc8..869bc613c4ad2 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/softmax.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h index e8e0241e46ad2..5190108acdee5 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -196,7 +196,7 @@ class SequenceTopkAvgPoolingGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, d_in, static_cast(0.0)); auto din_data = d_in->data(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h index 60ba4797db1e2..b85b938428288 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_padding.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -87,7 +87,7 @@ class SequenceUnpadGradOpKernel : public framework::OpKernel { LoDTensor zero_pads; zero_pads.Resize({1, 1}); zero_pads.mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, &zero_pads, static_cast(0)); diff --git a/paddle/fluid/operators/set_value_op.h b/paddle/fluid/operators/set_value_op.h index 1580ef140ada1..633bc468dc44e 100644 --- a/paddle/fluid/operators/set_value_op.h +++ b/paddle/fluid/operators/set_value_op.h @@ -437,7 +437,7 @@ class SetValueGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); auto& place = *context.template device_context().eigen_device(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; if (grad_input) { // Set gradient of `Input` diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc index 493073fadc2bd..38721e5e3e5bd 100644 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/array_operator.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/pten/core/lod_utils.h" @@ -156,7 +156,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { auto &dev_ctx = *pool.Get(place); if (dout_var == nullptr) { // dx_tensor fill zero - math::set_constant(dev_ctx, &dx_tensor, 0.0f); + pten::funcs::set_constant(dev_ctx, &dx_tensor, 0.0f); } else { auto &dout_tensor = dout_var->Get(); auto height = dout_tensor.dims()[0]; @@ -165,7 +165,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { if (dx_tensor.dims()[0] > height) { auto rest_tensor = dx_tensor.Slice( static_cast(height), static_cast(dx_tensor.dims()[0])); - math::set_constant(dev_ctx, &rest_tensor, 0.0f); + pten::funcs::set_constant(dev_ctx, &rest_tensor, 0.0f); } } dx_tensor.set_lod(x_tensor.lod()); diff --git a/paddle/fluid/operators/shuffle_channel_op.h b/paddle/fluid/operators/shuffle_channel_op.h index 3ce1e0c770bb3..2bf96fad26993 100644 --- a/paddle/fluid/operators/shuffle_channel_op.h +++ b/paddle/fluid/operators/shuffle_channel_op.h @@ -13,7 +13,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/slice_op.h b/paddle/fluid/operators/slice_op.h index d9ef45343d83b..bf05bbadcbc02 100644 --- a/paddle/fluid/operators/slice_op.h +++ b/paddle/fluid/operators/slice_op.h @@ -18,9 +18,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/slice_utils.h" #include "paddle/fluid/operators/utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -299,7 +299,7 @@ class SliceGradKernel : public framework::OpKernel { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& dev_ctx = *pool.Get(ctx.GetPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; for (int i = 0; i < d_in_size; ++i) { auto dim = input_array->at(i).dims(); d_in_arr->at(i).Resize(dim); diff --git a/paddle/fluid/operators/softmax_op_npu_test.cc b/paddle/fluid/operators/softmax_op_npu_test.cc index 8e9e077b845ce..98a67bc74871e 100644 --- a/paddle/fluid/operators/softmax_op_npu_test.cc +++ b/paddle/fluid/operators/softmax_op_npu_test.cc @@ -23,12 +23,11 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(softmax); USE_OP_DEVICE_KERNEL(softmax, NPU); diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index fe025641330c3..33bbed0f69756 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -17,12 +17,12 @@ namespace cub = hipcub; #endif #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/math/cross_entropy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/softmax_cudnn_op.cu.h" #include "paddle/fluid/operators/softmax_with_cross_entropy_op.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -960,7 +960,7 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { softmax_out->template mutable_data(context.GetPlace()); auto* loss_data = loss->template mutable_data(context.GetPlace()); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(context.cuda_device_context(), loss, static_cast(0)); if (axis_dim == 1) { set_constant(context.cuda_device_context(), softmax_out, @@ -1045,7 +1045,7 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { auto* loss_data = loss->template mutable_data(context.GetPlace()); if (axis_dim == 1) { - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(context.cuda_device_context(), softmax, static_cast(1)); set_constant(context.cuda_device_context(), loss, static_cast(0)); return; diff --git a/paddle/fluid/operators/solve_op.h b/paddle/fluid/operators/solve_op.h index 7893b5da12c47..c023d33a444cf 100644 --- a/paddle/fluid/operators/solve_op.h +++ b/paddle/fluid/operators/solve_op.h @@ -21,10 +21,10 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h" #include "paddle/fluid/operators/squeeze_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(__NVCC__) || defined(__HIPCC__) #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" #endif @@ -509,7 +509,7 @@ class SolveGradKernel : public framework::OpKernel { const auto& new_dims_vec = getNewDimsVec(input->dims()); tmp_input.Resize(framework::make_ddim(new_dims_vec)); tmp_input.mutable_data(ctx.GetPlace()); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; std::vector new_axis = getNewAxis(input->dims().size()); auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, *input, &tmp_input, new_axis); diff --git a/paddle/fluid/operators/spectral_norm_op.h b/paddle/fluid/operators/spectral_norm_op.h index b8a15579e5345..d0edcc169255e 100644 --- a/paddle/fluid/operators/spectral_norm_op.h +++ b/paddle/fluid/operators/spectral_norm_op.h @@ -14,7 +14,7 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -40,19 +40,19 @@ static inline void TransCompute(const int rank, const Tensor& in, Tensor* out, switch (rank) { case 2: - math::Transpose trans2; + pten::funcs::Transpose trans2; trans2(dev_ctx, in, out, perm); break; case 3: - math::Transpose trans3; + pten::funcs::Transpose trans3; trans3(dev_ctx, in, out, perm); break; case 4: - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, in, out, perm); break; case 5: - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, in, out, perm); break; default: diff --git a/paddle/fluid/operators/spp_op.h b/paddle/fluid/operators/spp_op.h index 6f78b88573404..755cca99dad42 100644 --- a/paddle/fluid/operators/spp_op.h +++ b/paddle/fluid/operators/spp_op.h @@ -16,9 +16,9 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/operators/strided_memcpy.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -95,7 +95,7 @@ class SppGradKernel : public framework::OpKernel { std::string pooling_type = context.template Attr("pooling_type"); auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0)); auto out_stride = framework::stride(out->dims()); diff --git a/paddle/fluid/operators/squeeze_op.h b/paddle/fluid/operators/squeeze_op.h old mode 100755 new mode 100644 index 2f621c11e58f6..d86037fa03258 --- a/paddle/fluid/operators/squeeze_op.h +++ b/paddle/fluid/operators/squeeze_op.h @@ -18,9 +18,9 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/squeeze_op_npu_test.cc b/paddle/fluid/operators/squeeze_op_npu_test.cc index 3f6c43d7af2fe..ecedc0ba1c294 100644 --- a/paddle/fluid/operators/squeeze_op_npu_test.cc +++ b/paddle/fluid/operators/squeeze_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(squeeze); USE_OP_DEVICE_KERNEL(squeeze, NPU); diff --git a/paddle/fluid/operators/strided_slice_op.h b/paddle/fluid/operators/strided_slice_op.h index 47714ebb806e9..d1efd3b675192 100644 --- a/paddle/fluid/operators/strided_slice_op.h +++ b/paddle/fluid/operators/strided_slice_op.h @@ -18,8 +18,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/slice_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -615,7 +615,7 @@ class StridedSliceGradKernel : public framework::OpKernel { d_out_tensor->mutable_data(context.GetPlace()); } - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, d_out_tensor, static_cast(0)); } } @@ -628,7 +628,7 @@ class StridedSliceGradKernel : public framework::OpKernel { d_out->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, d_out, static_cast(0)); auto in_dims = d_input->dims(); diff --git a/paddle/fluid/operators/sum_op.cu b/paddle/fluid/operators/sum_op.cu index 9de9b0b6338df..ce152f4450811 100644 --- a/paddle/fluid/operators/sum_op.cu +++ b/paddle/fluid/operators/sum_op.cu @@ -134,7 +134,7 @@ void SumToLoDTensor(const framework::ExecutionContext &context) { int start = in_place ? 1 : 0; if (!in_place) { - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor( context.template device_context(), out, static_cast(0)); diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 4e108b56a404d..d8d57b1f7f0a9 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -167,7 +167,7 @@ class SumKernel : public framework::OpKernel { } if (start != 2) { VLOG(10) << "Fill with constant = 0 in sum kernel."; - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context.template device_context(), out, static_cast(0)); } diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index 48315980e3134..3a57a7b3e54cc 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -26,9 +26,9 @@ #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/complex_functors.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -232,11 +232,11 @@ static std::vector get_broadcast_batch_portion( return batchPortion; } -#define DITO_TRANSPOSE_RANK_CASE(N) \ - case N: { \ - math::Transpose trans; \ - trans(dev_ctx, x, &ret, axis); \ - break; \ +#define DITO_TRANSPOSE_RANK_CASE(N) \ + case N: { \ + pten::funcs::Transpose trans; \ + trans(dev_ctx, x, &ret, axis); \ + break; \ } #define DITO_SLICE_RANK_CASE(N) \ @@ -526,7 +526,7 @@ struct DeviceIndependenceTensorOperations { ret.Resize(framework::make_ddim(shape)); ret.mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); - SetConstant()(dev_ctx, &ret, T(fill_value)); + pten::funcs::SetConstant()(dev_ctx, &ret, T(fill_value)); return ret; } framework::Tensor Infinits(std::vector shape) { diff --git a/paddle/fluid/operators/take_along_axis_op.cu b/paddle/fluid/operators/take_along_axis_op.cu index e9f9b18718787..2d0ebbc20f215 100644 --- a/paddle/fluid/operators/take_along_axis_op.cu +++ b/paddle/fluid/operators/take_along_axis_op.cu @@ -63,7 +63,7 @@ class TakeAlongAxisGradOpCUDAKernel : public framework::OpKernel { // Set to zero tensor. auto &dev_ctx = ctx.template device_context(); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), input_grad, static_cast(0)); const auto &index_type = index->type(); diff --git a/paddle/fluid/operators/take_along_axis_op.h b/paddle/fluid/operators/take_along_axis_op.h index 580ca528ceb32..e7f804621b3f4 100644 --- a/paddle/fluid/operators/take_along_axis_op.h +++ b/paddle/fluid/operators/take_along_axis_op.h @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather_scatter_kernel.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -72,7 +72,7 @@ class TakeAlongAxisGradOpKernel : public framework::OpKernel { // Set to zero tensor. auto &dev_ctx = ctx.template device_context(); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), input_grad, static_cast(0)); diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc index 0e0a594846f27..62c07d0654fe0 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/temporal_shift_op.h b/paddle/fluid/operators/temporal_shift_op.h index 05364b94c92c6..4b2aa098d0dd8 100644 --- a/paddle/fluid/operators/temporal_shift_op.h +++ b/paddle/fluid/operators/temporal_shift_op.h @@ -11,7 +11,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/transpose_op.h b/paddle/fluid/operators/transpose_op.h index e4e5dfdba9f60..c873f845117df 100644 --- a/paddle/fluid/operators/transpose_op.h +++ b/paddle/fluid/operators/transpose_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -29,32 +29,32 @@ inline void TransCompute(const int dim, const DeviceContext& dev_ctx, const std::vector& axis) { switch (dim) { case 1: - math::Transpose trans1; + pten::funcs::Transpose trans1; trans1(dev_ctx, in, out, axis); break; case 2: - math::Transpose trans2; + pten::funcs::Transpose trans2; trans2(dev_ctx, in, out, axis); break; case 3: - math::Transpose trans3; + pten::funcs::Transpose trans3; trans3(dev_ctx, in, out, axis); break; case 4: - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, in, out, axis); break; case 5: - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, in, out, axis); break; case 6: - math::Transpose trans6; + pten::funcs::Transpose trans6; trans6(dev_ctx, in, out, axis); break; default: // for dim >= 7 situation - math::TransposeNormal trans_normal; + pten::funcs::TransposeNormal trans_normal; trans_normal(dev_ctx, in, out, axis); } } diff --git a/paddle/fluid/operators/transpose_op_npu_test.cc b/paddle/fluid/operators/transpose_op_npu_test.cc index 91923da819dc5..49aa265656ea2 100644 --- a/paddle/fluid/operators/transpose_op_npu_test.cc +++ b/paddle/fluid/operators/transpose_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(transpose2); USE_OP_DEVICE_KERNEL(transpose2, NPU); diff --git a/paddle/fluid/operators/tree_conv_op.h b/paddle/fluid/operators/tree_conv_op.h index a84589b32fd00..c2a6cfdd0d37c 100644 --- a/paddle/fluid/operators/tree_conv_op.h +++ b/paddle/fluid/operators/tree_conv_op.h @@ -28,7 +28,7 @@ class TreeConvKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { math::Tree2ColFunctor tree2col; - math::SetConstant constant; + pten::funcs::SetConstant constant; auto *Edges = ctx.Input("EdgeSet"); auto *Embeddings = ctx.Input("NodesVector"); @@ -86,7 +86,7 @@ class TreeConvGradKernel : public framework::OpKernel { auto *Filter = ctx.Input("Filter"); math::Tree2ColFunctor tree2col; math::Col2TreeFunctor col2tree; - math::SetConstant constant; + pten::funcs::SetConstant constant; auto &dev_ctx = ctx.template device_context(); auto blas = math::GetBlas(dev_ctx); diff --git a/paddle/fluid/operators/unfold_op.h b/paddle/fluid/operators/unfold_op.h index 006e4822fead0..5107b5cc4926f 100644 --- a/paddle/fluid/operators/unfold_op.h +++ b/paddle/fluid/operators/unfold_op.h @@ -19,7 +19,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/im2col.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -106,7 +106,7 @@ class UnfoldGradOpKernel : public framework::OpKernel { math::Col2ImFunctor col2im; auto& dev_ctx = ctx.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, input_grad, static_cast(0)); for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = diff --git a/paddle/fluid/operators/unique_consecutive_op.h b/paddle/fluid/operators/unique_consecutive_op.h index e6cb5dafe3433..9b933dfd92f55 100644 --- a/paddle/fluid/operators/unique_consecutive_op.h +++ b/paddle/fluid/operators/unique_consecutive_op.h @@ -22,9 +22,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/operators/unique_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h index 66b0543771f4d..c3d291d1201c6 100644 --- a/paddle/fluid/operators/unique_op.h +++ b/paddle/fluid/operators/unique_op.h @@ -22,8 +22,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/transpose_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unique_with_counts_op.h b/paddle/fluid/operators/unique_with_counts_op.h index f61bac7cda003..fc3568ff181d8 100644 --- a/paddle/fluid/operators/unique_with_counts_op.h +++ b/paddle/fluid/operators/unique_with_counts_op.h @@ -18,8 +18,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/unique_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unpool_op.h b/paddle/fluid/operators/unpool_op.h index 52849cb3e0f8e..95aa1a4688b5d 100644 --- a/paddle/fluid/operators/unpool_op.h +++ b/paddle/fluid/operators/unpool_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/unpooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -36,7 +36,7 @@ class UnpoolKernel : public framework::OpKernel { T* output_data = out->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); if (output_data) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, out, static_cast(0)); } math::Unpool2dMaxFunctor unpool2d_max_forward; @@ -60,7 +60,7 @@ class UnpoolGradKernel : public framework::OpKernel { std::vector paddings = context.Attr>("paddings"); auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0)); @@ -84,7 +84,7 @@ class Unpool3dKernel : public framework::OpKernel { T* output_data = out->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); if (output_data) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, out, static_cast(0)); } math::Unpool3dMaxFunctor unpool3d_max_forward; @@ -109,7 +109,7 @@ class Unpool3dGradKernel : public framework::OpKernel { std::vector paddings = context.Attr>("paddings"); auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0)); diff --git a/paddle/fluid/operators/unsqueeze_op.h b/paddle/fluid/operators/unsqueeze_op.h index d7a1e0ed3b843..649cc9de50e0d 100644 --- a/paddle/fluid/operators/unsqueeze_op.h +++ b/paddle/fluid/operators/unsqueeze_op.h @@ -17,10 +17,10 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unsqueeze_op_npu_test.cc b/paddle/fluid/operators/unsqueeze_op_npu_test.cc index cf96ef57a4df0..c34cdbc2e79f7 100644 --- a/paddle/fluid/operators/unsqueeze_op_npu_test.cc +++ b/paddle/fluid/operators/unsqueeze_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(unsqueeze); USE_OP_DEVICE_KERNEL(unsqueeze, NPU); diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index db8b2c30501bd..f67b969d4590a 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/dynload/mklml.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/viterbi_decode_op.h b/paddle/fluid/operators/viterbi_decode_op.h index ab95dbc763a5e..77e38f4fa8585 100644 --- a/paddle/fluid/operators/viterbi_decode_op.h +++ b/paddle/fluid/operators/viterbi_decode_op.h @@ -250,8 +250,8 @@ class ViterbiDecodeKernel : public framework::OpKernel { auto batch_size = static_cast(input->dims()[0]); auto seq_len = static_cast(input->dims()[1]); auto n_labels = static_cast(input->dims()[2]); - math::SetConstant float_functor; - math::SetConstant int_functor; + pten::funcs::SetConstant float_functor; + pten::funcs::SetConstant int_functor; std::vector historys; // We create tensor buffer in order to avoid allocating memory frequently // 10 means allocate 10*batch_size bytes memory, such as int_mask, zero... diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 56f1d8d97ba61..3f8c38aa60127 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -17,10 +17,10 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_padding.h" #include "paddle/fluid/operators/math/sequence_scale.h" #include "paddle/fluid/platform/dynload/warpctc.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -138,7 +138,7 @@ class WarpCTCFunctor { framework::make_ddim({static_cast(workspace_elements)}), dev_ctx); T* workspace_data = workspace.data(); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), &workspace, static_cast(0)); @@ -334,7 +334,7 @@ class WarpCTCKernel : public framework::OpKernel { T* warpctc_grad_data = warpctc_grad->mutable_data(warpctc_logits.dims(), ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), warpctc_grad, static_cast(0)); diff --git a/paddle/fluid/operators/where_index_op.h b/paddle/fluid/operators/where_index_op.h index 97a7bb939b971..c6828a7876831 100644 --- a/paddle/fluid/operators/where_index_op.h +++ b/paddle/fluid/operators/where_index_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/where_op.h b/paddle/fluid/operators/where_op.h index fdb65858eff50..415632f3d7e76 100644 --- a/paddle/fluid/operators/where_op.h +++ b/paddle/fluid/operators/where_op.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/pten/kernels/cpu/norm_grad_kernel.cc b/paddle/pten/kernels/cpu/norm_grad_kernel.cc index 3357e6f76fa56..7b2a07c37bc71 100644 --- a/paddle/pten/kernels/cpu/norm_grad_kernel.cc +++ b/paddle/pten/kernels/cpu/norm_grad_kernel.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/pten/kernels/norm_grad_kernel.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/kernels/funcs/eigen/eigen_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/pten/kernels/funcs/eigen/common.h" diff --git a/paddle/pten/kernels/cpu/norm_kernel.cc b/paddle/pten/kernels/cpu/norm_kernel.cc index ef2cf405c13b5..f2996faccb1f8 100644 --- a/paddle/pten/kernels/cpu/norm_kernel.cc +++ b/paddle/pten/kernels/cpu/norm_kernel.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/pten/kernels/norm_kernel.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/funcs/common_shape.h" #include "paddle/pten/kernels/funcs/eigen/eigen_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace pten { diff --git a/paddle/pten/kernels/funcs/CMakeLists.txt b/paddle/pten/kernels/funcs/CMakeLists.txt index 32bdc94b95d52..e4dd437629a9b 100644 --- a/paddle/pten/kernels/funcs/CMakeLists.txt +++ b/paddle/pten/kernels/funcs/CMakeLists.txt @@ -6,3 +6,51 @@ if(WITH_GPU) elseif(WITH_ROCM) hip_library(pten_transpose_gpu SRCS transpose.cu DEPS dense_tensor malloc pten_context) endif() + +function(math_library TARGET) + # math_library is a function to create math library. + # The interface is the same as cc_library. + # But it handle split GPU/CPU code and link some common library. + set(cc_srcs) + set(cu_srcs) + set(hip_srcs) + set(math_common_deps device_context framework_proto enforce) + if (WITH_GPU) + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + list(APPEND math_common_deps cub) + else() + list(APPEND math_common_deps) + endif() + endif() + set(multiValueArgs DEPS) + cmake_parse_arguments(math_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) + list(APPEND cc_srcs ${TARGET}.cc) + endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) + list(APPEND cu_srcs ${TARGET}.cu) + endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) + list(APPEND cu_srcs ${TARGET}.cu.cc) + endif() + + list(LENGTH cc_srcs cc_srcs_len) + if (WITH_GPU) + nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + elseif (WITH_ROCM) + hip_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + elseif(${cc_srcs_len} GREATER 0) + cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + endif() +endfunction() + +math_library(math_function DEPS blas dense_tensor tensor) +cc_test(math_function_test SRCS math_function_test.cc DEPS math_function) +if(WITH_GPU) + nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) +endif() +if(WITH_ROCM) + hip_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor) +endif() diff --git a/paddle/pten/kernels/funcs/elementwise_base.h b/paddle/pten/kernels/funcs/elementwise_base.h index 0f26f3d8aa661..2fcab4d667194 100644 --- a/paddle/pten/kernels/funcs/elementwise_base.h +++ b/paddle/pten/kernels/funcs/elementwise_base.h @@ -14,12 +14,12 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/fluid/platform/transform.h" #include "paddle/pten/backends/all_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/kernels/empty_kernel.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(__NVCC__) || defined(__HIPCC__) #include "paddle/fluid/platform/aligned_vector.h" @@ -394,7 +394,7 @@ static inline void GetDoubleGradSafeTensor(const DeviceContext &dev_ctx, auto meta = pten::DenseTensorMeta(x.dtype(), x.dims(), x.layout()); *ddx_safe = pten::Empty(dev_ctx, std::move(meta)); ddx_safe->mutable_data(dev_ctx.GetPlace()); - paddle::operators::math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, ddx_safe, static_cast(0)); } } diff --git a/paddle/pten/kernels/funcs/math_function.cc b/paddle/pten/kernels/funcs/math_function.cc new file mode 100644 index 0000000000000..550ec23c18f3a --- /dev/null +++ b/paddle/pten/kernels/funcs/math_function.cc @@ -0,0 +1,342 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/funcs/math_function.h" + +#ifdef PADDLE_WITH_MKLML +#include "paddle/fluid/platform/dynload/mklml.h" +#endif + +#ifdef PADDLE_USE_OPENBLAS +#include +#endif + +#include +#include +#include +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/kernels/funcs/eigen/common.h" +#include "paddle/pten/kernels/funcs/math_function_impl.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace pten { +namespace funcs { + +using float16 = paddle::platform::float16; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +#ifdef PADDLE_WITH_XPU +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; +#endif + +#define DEFINE_CPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose, \ + RANK>; \ + template struct Transpose, \ + RANK>; + +DEFINE_CPU_TRANS(1); +DEFINE_CPU_TRANS(2); +DEFINE_CPU_TRANS(3); +DEFINE_CPU_TRANS(4); +DEFINE_CPU_TRANS(5); +DEFINE_CPU_TRANS(6); + +template +struct TransposeNormal { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis) { + const int rank = axis.size(); + auto in_stride = paddle::framework::stride(in.dims()); + auto out_stride = paddle::framework::stride(out->dims()); + const T* in_ptr = in.data(); + T* out_ptr = out->data(); + + auto transpose_helper = [&](int64_t beg, int64_t end) { + for (int64_t out_idx = beg; out_idx < end; ++out_idx) { + int64_t in_idx = 0; + int64_t tmp_idx = out_idx; + // calculate the input index + for (int i = 0; i < rank; ++i) { + const int64_t coordinate = tmp_idx / out_stride[i]; + tmp_idx -= coordinate * out_stride[i]; + in_idx += coordinate * in_stride[axis[i]]; + } + out_ptr[out_idx] = in_ptr[in_idx]; + } + }; + transpose_helper(0, out->numel()); + } +}; + +// define transpose normal +#define DEFINE_CPU_TRANS_NORMAL(TYPE) \ + template struct TransposeNormal + +DEFINE_CPU_TRANS_NORMAL(paddle::platform::float16); +DEFINE_CPU_TRANS_NORMAL(paddle::platform::bfloat16); +DEFINE_CPU_TRANS_NORMAL(float); +DEFINE_CPU_TRANS_NORMAL(double); +DEFINE_CPU_TRANS_NORMAL(int); +DEFINE_CPU_TRANS_NORMAL(int64_t); +DEFINE_CPU_TRANS_NORMAL(bool); +DEFINE_CPU_TRANS_NORMAL(int16_t); +DEFINE_CPU_TRANS_NORMAL(uint8_t); +DEFINE_CPU_TRANS_NORMAL(int8_t); +DEFINE_CPU_TRANS_NORMAL(paddle::platform::complex); +DEFINE_CPU_TRANS_NORMAL(paddle::platform::complex); + +struct TensorSetConstantCPU { + TensorSetConstantCPU(paddle::framework::Tensor* tensor, float value) + : tensor_(tensor), value_(value) {} + template + void apply() const { + auto cpu = paddle::platform::CPUPlace(); + auto* begin = tensor_->mutable_data(cpu); + std::fill(begin, begin + tensor_->numel(), static_cast(value_)); + } + paddle::framework::Tensor* tensor_; + float value_; +}; + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW( + paddle::platform::errors::Unimplemented("XPUPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW( + paddle::platform::errors::Unimplemented("NPUPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "NPUPinnedPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW( + paddle::platform::errors::Unimplemented("IPUPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + paddle::framework::VisitDataType(tensor->type(), + TensorSetConstantCPU(tensor, value)); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW( + paddle::platform::errors::Unimplemented("MLUPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + paddle::framework::VisitDataType(tensor->type(), + TensorSetConstantCPU(tensor, value)); +} + +struct TensorSetConstantWithPlace : public boost::static_visitor { + TensorSetConstantWithPlace(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) + : context_(context), tensor_(tensor), value_(value) {} + + template + void operator()(Place place) const { + set_constant_with_place(context_, tensor_, value_); + } + + const paddle::platform::DeviceContext& context_; + paddle::framework::Tensor* tensor_; + float value_; +}; + +void set_constant(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + TensorSetConstantWithPlace func(context, tensor, value); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + // tensor->place().apply_visitor(func); + paddle::platform::VisitPlace(tensor->place(), func); +#else + func(paddle::platform::CPUPlace()); +#endif +} + +template +struct RowwiseAdd { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& input, + const paddle::framework::Tensor& vector, + paddle::framework::Tensor* output) { + auto in_dims = input.dims(); + auto out_dims = output->dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ( + vector.numel(), + size, + paddle::platform::errors::InvalidArgument( + "The input vector size" + " should be equal to the size of each row of input tensor." + " Expected vector size=%d, but received %d", + size, + vector.numel())); + const char* in_dims_cstr = in_dims.to_str().c_str(); + const char* out_dims_cstr = out_dims.to_str().c_str(); + PADDLE_ENFORCE_EQ(out_dims, + in_dims, + paddle::platform::errors::InvalidArgument( + "The output tensor shape should be same as the input" + " tensor shape. Expected output tensor shape: %s," + " but received %s", + in_dims_cstr, + out_dims_cstr)); + + auto in = paddle::framework::EigenMatrix::From(input); + auto vec = paddle::framework::EigenVector::Flatten(vector); + auto out = paddle::framework::EigenMatrix::From(*output); + + for (int64_t i = 0; i < in_dims[0]; ++i) { + out.chip(i, 0) = in.chip(i, 0) + vec; + } + } +}; + +template struct RowwiseAdd; +template struct RowwiseAdd; + +template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; + +template struct RowwiseSum; +template struct RowwiseSum; + +template struct RowwiseMean; +template struct RowwiseMean; + +template +struct ElementwiseAddTo { + void operator()(paddle::platform::CPUDeviceContext* ctx, + const paddle::framework::Tensor& src, + paddle::framework::Tensor* dst) { + auto in = paddle::framework::EigenVector::Flatten(src); + auto out = paddle::framework::EigenVector::Flatten(*dst); + auto& place = *(ctx->eigen_device()); + out.device(place) = out + in; + } +}; + +template struct ElementwiseAddTo; + +} // namespace funcs +} // namespace pten diff --git a/paddle/pten/kernels/funcs/math_function.cu b/paddle/pten/kernels/funcs/math_function.cu new file mode 100644 index 0000000000000..76bc5f806d3e8 --- /dev/null +++ b/paddle/pten/kernels/funcs/math_function.cu @@ -0,0 +1,380 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include +#include +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/memory/malloc.h" +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/kernels/funcs/eigen/common.h" +#include "paddle/pten/kernels/funcs/math_function.h" +#include "paddle/pten/kernels/funcs/math_function_impl.h" + +namespace pten { +namespace funcs { + +using float16 = paddle::platform::float16; +using bfloat16 = paddle::platform::bfloat16; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +#define DEFINE_GPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose, \ + RANK>; \ + template struct Transpose, \ + RANK>; + +DEFINE_GPU_TRANS(1); +DEFINE_GPU_TRANS(2); +DEFINE_GPU_TRANS(3); +DEFINE_GPU_TRANS(4); +DEFINE_GPU_TRANS(5); +DEFINE_GPU_TRANS(6); + +#define REINTERPRET(T, DST_PTR, SRC_PTR) \ + T* DST_PTR = reinterpret_cast(SRC_PTR) + +template +__global__ void TransposeNormalKernel(const T* in_ptr, + T* out_ptr, + int64_t element, + const int64_t* in_stride_ptr, + const int64_t* out_stride_ptr, + const int64_t* axis_ptr, + int rank) { + CUDA_KERNEL_LOOP(out_idx, element) { + int64_t in_idx = 0; + int64_t tmp_idx = out_idx; + for (int i = 0; i < rank; ++i) { + const int64_t coordinate = tmp_idx / out_stride_ptr[i]; + tmp_idx -= coordinate * out_stride_ptr[i]; + in_idx += coordinate * in_stride_ptr[axis_ptr[i]]; + } + out_ptr[out_idx] = in_ptr[in_idx]; + } +} + +template +struct TransposeNormal { + void operator()(const paddle::platform::CUDADeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis) { + const int rank = axis.size(); + auto in_stride = paddle::framework::stride(in.dims()); + auto out_stride = paddle::framework::stride(out->dims()); + auto* in_ptr = in.data(); + auto* out_ptr = out->data(); + + // copy in_stride, out_stride, axis to gpu device + const paddle::platform::CUDAPlace& cuda_place = context.GetPlace(); + paddle::platform::CPUPlace cpu_place = paddle::platform::CPUPlace(); + size_t size = 3 * rank * sizeof(int64_t); + auto cpu_buf_holder = paddle::memory::Alloc(cpu_place, size); + auto cuda_buf_holder = paddle::memory::Alloc(cuda_place, size); + REINTERPRET(int64_t, cpu_buf, cpu_buf_holder->ptr()); + REINTERPRET(int64_t, cuda_buf, cuda_buf_holder->ptr()); + for (int i = 0; i < rank; ++i) { + cpu_buf[i] = in_stride[i]; + cpu_buf[rank + i] = out_stride[i]; + cpu_buf[2 * rank + i] = axis[i]; + } + paddle::memory::Copy( + cuda_place, cuda_buf, cpu_place, cpu_buf, size, context.stream()); + REINTERPRET(const int64_t, in_stride_ptr, cuda_buf); + REINTERPRET(const int64_t, out_stride_ptr, cuda_buf + rank); + REINTERPRET(const int64_t, axis_ptr, cuda_buf + 2 * rank); + + const int MAX_BLOCK_DIM = context.GetMaxThreadsPerBlock(); + const int MAX_GRID_DIM = + context.GetMaxPhysicalThreadCount() / MAX_BLOCK_DIM; + int64_t elements = in.numel(); + int block_size = (elements >= MAX_BLOCK_DIM) + ? MAX_BLOCK_DIM + : (1 << static_cast(std::log2(elements))); + int grid_size = elements / block_size; + grid_size = (grid_size >= MAX_GRID_DIM) ? MAX_GRID_DIM : grid_size; + TransposeNormalKernel<<>>( + in_ptr, + out_ptr, + elements, + in_stride_ptr, + out_stride_ptr, + axis_ptr, + rank); + } +}; + +// define transpose normal +#define DEFINE_GPU_TRANS_NORMAL(TYPE) \ + template struct TransposeNormal + +DEFINE_GPU_TRANS_NORMAL(float16); +DEFINE_GPU_TRANS_NORMAL(bfloat16); +DEFINE_GPU_TRANS_NORMAL(float); +DEFINE_GPU_TRANS_NORMAL(double); +DEFINE_GPU_TRANS_NORMAL(int); +DEFINE_GPU_TRANS_NORMAL(int64_t); +DEFINE_GPU_TRANS_NORMAL(bool); +DEFINE_GPU_TRANS_NORMAL(int16_t); +DEFINE_GPU_TRANS_NORMAL(uint8_t); +DEFINE_GPU_TRANS_NORMAL(int8_t); +DEFINE_GPU_TRANS_NORMAL(paddle::platform::complex); +DEFINE_GPU_TRANS_NORMAL(paddle::platform::complex); + +struct TensorSetConstantGPU { + TensorSetConstantGPU(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) + : context_(context), tensor_(tensor), value_(value) {} + + template + void apply() const { + SetConstant functor; + functor( + reinterpret_cast(context_), + tensor_, + static_cast(value_)); + } + + const paddle::platform::DeviceContext& context_; + paddle::framework::Tensor* tensor_; + float value_; +}; + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + paddle::framework::VisitDataType( + tensor->type(), TensorSetConstantGPU(context, tensor, value)); +} + +template +__global__ void RowwiseAddKernel( + const T* a, const T* b, T* c, int width, int num) { + T tmp = 1.0 / width; + CUDA_KERNEL_LOOP(i, num) { + int h = i * tmp; + int w = i - h * width; + c[i] = a[i] + b[w]; + } +} + +template +struct RowwiseAdd { + void operator()(const paddle::platform::CUDADeviceContext& context, + const paddle::framework::Tensor& input, + const paddle::framework::Tensor& vector, + paddle::framework::Tensor* output) { + auto in_dims = input.dims(); + auto out_dims = output->dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ( + vector.numel(), + size, + paddle::platform::errors::InvalidArgument( + "The input vector size" + " should be equal to the size of each row of input tensor." + " Expected vector size=%d, but received %d", + size, + vector.numel())); + const char* in_dims_cstr = in_dims.to_str().c_str(); + const char* out_dims_cstr = out_dims.to_str().c_str(); + PADDLE_ENFORCE_EQ( + out_dims, + in_dims, + paddle::platform::errors::InvalidArgument( + "The output tensor shape should be same as the input tensor" + " shape. Expected output tensor shape: %s," + " but received %s", + in_dims_cstr, + out_dims_cstr)); + int blocks = 512; + int grids = (input.numel() + blocks - 1) / blocks; + RowwiseAddKernel<<>>( + input.data(), + vector.data(), + output->data(), + static_cast(in_dims[1]), + static_cast(input.numel())); + } +}; + +template struct RowwiseAdd; +template struct RowwiseAdd; +template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; +// template struct ColwiseSum; +// The ColwiseSum failed in debug +// mode, +// and only failed for this case. So reimplemented it. +template <> +void ColwiseSum::operator()( + const paddle::platform::CUDADeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vector) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector->numel(), + size, + paddle::platform::errors::InvalidArgument( + "The size of input vector" + " should be equal to the size of input tensor column" + " dimension. Expected vector size=%d, but received %d", + size, + vector->numel())); + paddle::framework::Tensor one; + one.mutable_data({in_dims[0]}, context.GetPlace()); + SetConstant set; + set(context, &one, static_cast(1.0)); + paddle::operators::math::GetBlas( + context) + .GEMV(true, + static_cast(in_dims[0]), + static_cast(in_dims[1]), + 1.0, + input.data(), + one.data(), + 0.0, + vector->data()); +} + +template struct RowwiseSum; +// template struct RowwiseSum; +// TODO(zcd): Following ColwiseSum format, need to confirm. +// The RowwiseSum failed in debug +// mode, +// and only failed for this case. So reimplemented it. +template <> +void RowwiseSum::operator()( + const paddle::platform::CUDADeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vector) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector->numel(), + in_dims[0], + paddle::platform::errors::InvalidArgument( + "The size of input vector" + " should be equal to the size of input tensor row" + " dimension. Expected vector size=%d, but received %d", + in_dims[0], + vector->numel())); + paddle::framework::Tensor one; + one.mutable_data({size}, context.GetPlace()); + SetConstant set; + set(context, &one, static_cast(1.0)); + paddle::operators::math::GetBlas( + context) + .GEMV(true, + static_cast(in_dims[1]), + static_cast(in_dims[0]), + 1.0, + one.data(), + input.data(), + 0.0, + vector->data()); +} + +template struct RowwiseMean; +template struct RowwiseMean; + +template +struct ElementwiseAddTo { + void operator()(paddle::platform::CUDADeviceContext* ctx, + const paddle::framework::Tensor& src, + paddle::framework::Tensor* dst) { + auto in = paddle::framework::EigenVector::Flatten(src); + auto out = paddle::framework::EigenVector::Flatten(*dst); + auto& place = *(ctx->eigen_device()); + out.device(place) = out + in; + } +}; + +template struct ElementwiseAddTo; + +} // namespace funcs +} // namespace pten diff --git a/paddle/pten/kernels/funcs/math_function.h b/paddle/pten/kernels/funcs/math_function.h new file mode 100644 index 0000000000000..8208c0afb0675 --- /dev/null +++ b/paddle/pten/kernels/funcs/math_function.h @@ -0,0 +1,127 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include + +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/pten/core/dense_tensor.h" + +namespace pten { +namespace funcs { + +template +struct TransposeNormal { + // for dims >= 7 situation + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis); +}; + +template +struct Transpose { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis); +}; + +template +struct SetConstant { + void operator()(const DeviceContext& context, + paddle::framework::Tensor* tensor, + T num); +}; + +template +void set_constant_with_place(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value); + +void set_constant(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value); + +template +struct RowwiseAdd { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& input, + const paddle::framework::Tensor& vec, + paddle::framework::Tensor* output); +}; + +template +struct ElementwiseAddTo { + // dst = dst + src + void operator()(DeviceContext* ctx, + const paddle::framework::Tensor& src, + paddle::framework::Tensor* dst); +}; + +template +struct ColwiseSum { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vec); +}; + +template +struct RowwiseSum { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vec); +}; + +template +struct RowwiseMean { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vec); +}; + +#ifdef PADDLE_WITH_XPU +template +struct TensorSetConstantXPU { + TensorSetConstantXPU(paddle::framework::Tensor* tensor, + U value, + paddle::platform::Place place) + : tensor_(tensor), value_(value), place_(place) {} + template + void apply() const { + auto* begin = tensor_->mutable_data(place_); + int numel = tensor_->numel(); + std::unique_ptr data_cpu(new T[numel]); + std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast(value_)); + paddle::memory::Copy(place_, + begin, + paddle::platform::CPUPlace(), + static_cast(data_cpu.get()), + numel * sizeof(T)); + } + paddle::framework::Tensor* tensor_; + U value_; + paddle::platform::Place place_; +}; +#endif + +} // namespace funcs +} // namespace pten diff --git a/paddle/fluid/operators/math/math_function_impl.h b/paddle/pten/kernels/funcs/math_function_impl.h similarity index 54% rename from paddle/fluid/operators/math/math_function_impl.h rename to paddle/pten/kernels/funcs/math_function_impl.h index 0e44f90304330..286f694ce51a9 100644 --- a/paddle/fluid/operators/math/math_function_impl.h +++ b/paddle/pten/kernels/funcs/math_function_impl.h @@ -16,47 +16,47 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" -namespace paddle { -namespace operators { -namespace math { +namespace pten { +namespace funcs { -using framework::To32BitIndex; +using paddle::framework::To32BitIndex; template -void SetConstant::operator()(const DeviceContext& context, - framework::Tensor* tensor, - T num) { +void SetConstant::operator()( + const DeviceContext& context, paddle::framework::Tensor* tensor, T num) { bool xpu_place = false; #ifdef PADDLE_WITH_XPU - if (platform::is_xpu_place(context.GetPlace())) { + if (paddle::platform::is_xpu_place(context.GetPlace())) { xpu_place = true; - framework::VisitDataType( + paddle::framework::VisitDataType( tensor->type(), TensorSetConstantXPU(tensor, num, context.GetPlace())); } #endif if (!xpu_place) { - auto t = framework::EigenVector::Flatten(*tensor); + auto t = paddle::framework::EigenVector::Flatten(*tensor); t.device(*context.eigen_device()) = t.constant(static_cast(num)); } } template void Transpose::operator()( - const DeviceContext& context, const framework::Tensor& in, - framework::Tensor* out, const std::vector& axis) { + const DeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis) { Eigen::array permute; for (int i = 0; i < Rank; i++) { permute[i] = axis[i]; } - auto eigen_in = framework::EigenTensor::From(in); - auto eigen_out = framework::EigenTensor::From(*out); + auto eigen_in = paddle::framework::EigenTensor::From(in); + auto eigen_out = paddle::framework::EigenTensor::From(*out); auto* dev = context.eigen_device(); // use 32bit index to speed up computation bool use_32bit_index = eigen_out.size() < Eigen::NumTraits::highest(); - bool is_gpu_place = platform::is_gpu_place(context.GetPlace()); + bool is_gpu_place = paddle::platform::is_gpu_place(context.GetPlace()); if (use_32bit_index && is_gpu_place) { To32BitIndex(eigen_out).device(*dev) = To32BitIndex(eigen_in).shuffle(permute); @@ -66,20 +66,23 @@ void Transpose::operator()( } template -void ColwiseSum::operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* out) { +void ColwiseSum::operator()( + const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto in_dims = input.dims(); auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ(out->numel(), size, - platform::errors::InvalidArgument( + PADDLE_ENFORCE_EQ(out->numel(), + size, + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor column" " dimension. Expected output size=%d, but received %d", - size, out->numel())); + size, + out->numel())); - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(*out); + auto in = paddle::framework::EigenMatrix::From(input); + auto vec = paddle::framework::EigenVector::Flatten(*out); vec.device(*context.eigen_device()) = in.sum(Eigen::array({{0}})); } @@ -88,20 +91,23 @@ void ColwiseSum::operator()(const DeviceContext& context, // colwise-sum can be easily implemented. General reduce has a huge overhead in // CPU template -class ColwiseSum { +class ColwiseSum { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, framework::Tensor* out) { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto& in_dims = input.dims(); auto height = in_dims[0]; auto size = in_dims[1]; PADDLE_ENFORCE_EQ( - out->numel(), size, - platform::errors::InvalidArgument( + out->numel(), + size, + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor column" " dimension. Expected output size=%d, but received %d", - size, out->numel())); + size, + out->numel())); T* out_buf = out->mutable_data(out->place()); const T* in_buf = input.data(); @@ -119,23 +125,28 @@ class ColwiseSum { }; template -void RowwiseMean::operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* out) { +void RowwiseMean::operator()( + const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto in_dims = input.dims(); - PADDLE_ENFORCE_EQ(in_dims.size(), 2U, platform::errors::InvalidArgument( - "The rank of input tensor " - "should be 2, but received %d", - in_dims.size())); - PADDLE_ENFORCE_EQ(out->numel(), in_dims[0], - platform::errors::InvalidArgument( + PADDLE_ENFORCE_EQ( + in_dims.size(), + 2U, + paddle::platform::errors::InvalidArgument("The rank of input tensor " + "should be 2, but received %d", + in_dims.size())); + PADDLE_ENFORCE_EQ(out->numel(), + in_dims[0], + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor row" " dimension. Expected output size=%d, but received %d", - in_dims[0], out->numel())); + in_dims[0], + out->numel())); - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(*out); + auto in = paddle::framework::EigenMatrix::From(input); + auto vec = paddle::framework::EigenVector::Flatten(*out); vec.device(*context.eigen_device()) = in.mean(Eigen::array({{1}})); } @@ -144,24 +155,29 @@ void RowwiseMean::operator()(const DeviceContext& context, // rowwise-sum can be easily implemented. General reduce has a huge overhead in // CPU template -class RowwiseMean { +class RowwiseMean { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, framework::Tensor* out) { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto& in_dims = input.dims(); - PADDLE_ENFORCE_EQ(in_dims.size(), 2U, platform::errors::InvalidArgument( - "The rank of input tensor " - "should be 2, but received %d", - in_dims.size())); + PADDLE_ENFORCE_EQ(in_dims.size(), + 2U, + paddle::platform::errors::InvalidArgument( + "The rank of input tensor " + "should be 2, but received %d", + in_dims.size())); auto height = in_dims[0]; auto size = in_dims[1]; PADDLE_ENFORCE_EQ( - out->numel(), height, - platform::errors::InvalidArgument( + out->numel(), + height, + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor row" " dimension. Expected output size=%d, but received %d", - height, out->numel())); + height, + out->numel())); auto inv_size = 1.0 / size; T* out_buf = out->mutable_data(out->place()); const T* in_buf = input.data(); @@ -177,23 +193,28 @@ class RowwiseMean { }; template -void RowwiseSum::operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* out) { +void RowwiseSum::operator()( + const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto in_dims = input.dims(); - PADDLE_ENFORCE_EQ(in_dims.size(), 2U, platform::errors::InvalidArgument( - "The rank of input tensor " - "should be 2, but received %d", - in_dims.size())); - PADDLE_ENFORCE_EQ(out->numel(), in_dims[0], - platform::errors::InvalidArgument( + PADDLE_ENFORCE_EQ( + in_dims.size(), + 2U, + paddle::platform::errors::InvalidArgument("The rank of input tensor " + "should be 2, but received %d", + in_dims.size())); + PADDLE_ENFORCE_EQ(out->numel(), + in_dims[0], + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor row" " dimension. Expected output size=%d, but received %d", - in_dims[0], out->numel())); + in_dims[0], + out->numel())); - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(*out); + auto in = paddle::framework::EigenMatrix::From(input); + auto vec = paddle::framework::EigenVector::Flatten(*out); vec.device(*context.eigen_device()) = in.sum(Eigen::array({{1}})); } @@ -202,24 +223,29 @@ void RowwiseSum::operator()(const DeviceContext& context, // rowwise-sum can be easily implemented. General reduce has a huge overhead in // CPU template -class RowwiseSum { +class RowwiseSum { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, framework::Tensor* out) { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto& in_dims = input.dims(); - PADDLE_ENFORCE_EQ(in_dims.size(), 2U, platform::errors::InvalidArgument( - "The rank of input tensor " - "should be 2, but received %d", - in_dims.size())); + PADDLE_ENFORCE_EQ(in_dims.size(), + 2U, + paddle::platform::errors::InvalidArgument( + "The rank of input tensor " + "should be 2, but received %d", + in_dims.size())); auto height = in_dims[0]; auto size = in_dims[1]; PADDLE_ENFORCE_EQ( - out->numel(), height, - platform::errors::InvalidArgument( + out->numel(), + height, + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor row" " dimension. Expected output size=%d, but received %d", - height, out->numel())); + height, + out->numel())); T* out_buf = out->mutable_data(out->place()); const T* in_buf = input.data(); @@ -234,6 +260,5 @@ class RowwiseSum { } }; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace pten diff --git a/paddle/fluid/operators/math/math_function_test.cc b/paddle/pten/kernels/funcs/math_function_test.cc similarity index 69% rename from paddle/fluid/operators/math/math_function_test.cc rename to paddle/pten/kernels/funcs/math_function_test.cc index 91a4f2746ea57..6ef8c6b689d2c 100644 --- a/paddle/fluid/operators/math/math_function_test.cc +++ b/paddle/pten/kernels/funcs/math_function_test.cc @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "gtest/gtest.h" #include "paddle/fluid/operators/math/blas.h" @@ -42,8 +42,19 @@ TEST(math_function, gemm_notrans_cblas) { memcpy(input3_ptr, arr3, 8 * sizeof(float)); paddle::platform::CPUDeviceContext context(*cpu_place); - GetBlas(context).GEMM(false, false, m, n, k, 1, input1_ptr, 3, - input2_ptr + 1, 4, 1, input3_ptr + 1, 4); + GetBlas(context).GEMM(false, + false, + m, + n, + k, + 1, + input1_ptr, + 3, + input2_ptr + 1, + 4, + 1, + input3_ptr + 1, + 4); EXPECT_EQ(input3_ptr[0], 0); EXPECT_EQ(input3_ptr[1], 24); @@ -83,15 +94,36 @@ void MklSmmCompare(int m, int n, int k) { auto smm = [&, m, n, k, lda, ldb, ldc, alpha, beta]() { const char transa = 'N'; const char transb = 'N'; - paddle::operators::math::CBlas::SMM_GEMM(&transa, &transb, &n, &m, &k, - &alpha, B, &ldb, A, &lda, &beta, - CSMM, &ldc); + paddle::operators::math::CBlas::SMM_GEMM(&transa, + &transb, + &n, + &m, + &k, + &alpha, + B, + &ldb, + A, + &lda, + &beta, + CSMM, + &ldc); }; auto mkl = [&, m, n, k, lda, ldb, ldc, alpha, beta]() { - paddle::operators::math::CBlas::GEMM(CblasRowMajor, CblasNoTrans, - CblasNoTrans, m, n, k, alpha, A, - lda, B, ldb, beta, CMKL, ldc); + paddle::operators::math::CBlas::GEMM(CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + m, + n, + k, + alpha, + A, + lda, + B, + ldb, + beta, + CMKL, + ldc); }; smm(); @@ -131,8 +163,19 @@ TEST(math_function, gemm_trans_cblas) { memcpy(input3_ptr, arr3, 8 * sizeof(float)); paddle::platform::CPUDeviceContext context(*cpu_place); - GetBlas(context).GEMM(false, true, m, n, k, 1, input1_ptr, 3, - input2_ptr + 3, 3, 1, input3_ptr + 1, 4); + GetBlas(context).GEMM(false, + true, + m, + n, + k, + 1, + input1_ptr, + 3, + input2_ptr + 3, + 3, + 1, + input3_ptr + 1, + 4); delete cpu_place; cpu_place = NULL; @@ -151,9 +194,7 @@ TEST(math_function, zero) { auto* cpu_place = new paddle::platform::CPUPlace(); float* t = tensor.mutable_data({2, 2}, *cpu_place); paddle::platform::CPUDeviceContext context(*cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; functor(context, &tensor, 0); EXPECT_EQ(t[0], 0); EXPECT_EQ(t[1], 0); @@ -188,8 +229,14 @@ void GemvTest(int m, int n, bool trans) { } paddle::platform::CPUDeviceContext context(*cpu_place); - GetBlas(context).GEMV(trans, static_cast(m), static_cast(n), 1., - data_a, data_b, 0., data_c); + GetBlas(context).GEMV(trans, + static_cast(m), + static_cast(n), + 1., + data_a, + data_b, + 0., + data_c); if (!trans) { for (int i = 0; i < m; ++i) { @@ -224,9 +271,10 @@ TEST(math_funciton, set_constant) { t.mutable_data(paddle::platform::CPUPlace()); auto* ctx = new paddle::platform::CPUDeviceContext(); ctx->Init(); - paddle::operators::math::set_constant(*ctx, &t, 10); + pten::funcs::set_constant(*ctx, &t, 10); for (int64_t i = 0; i < t.numel(); ++i) { - PADDLE_ENFORCE_EQ(10, t.data()[i], + PADDLE_ENFORCE_EQ(10, + t.data()[i], paddle::platform::errors::InvalidArgument( "Each value of input tensor should be 10, " "but received %d.", @@ -262,16 +310,27 @@ void GemmWarpTest(int m, int n, int k, T alpha, T beta) { // this would call gemm_warp paddle::platform::CPUDeviceContext context(*cpu_place); - GetBlas(context).GEMM(CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, B, - beta, CREF); + GetBlas(context).GEMM( + CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, B, beta, CREF); // lda,ldb,ldc follow RowMajor int lda = k; int ldb = n; int ldc = n; - paddle::operators::math::CBlas::GEMM(CblasRowMajor, CblasNoTrans, - CblasNoTrans, m, n, k, alpha, A, lda, - B, ldb, beta, CMKL, ldc); + paddle::operators::math::CBlas::GEMM(CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + m, + n, + k, + alpha, + A, + lda, + B, + ldb, + beta, + CMKL, + ldc); for (int i = 0; i < mat_c_mkl.numel(); ++i) { EXPECT_FLOAT_EQ(CREF[i], CMKL[i]); diff --git a/paddle/fluid/operators/math/math_function_test.cu b/paddle/pten/kernels/funcs/math_function_test.cu similarity index 90% rename from paddle/fluid/operators/math/math_function_test.cu rename to paddle/pten/kernels/funcs/math_function_test.cu index 39c91e96a70fa..87f11c47a4433 100644 --- a/paddle/fluid/operators/math/math_function_test.cu +++ b/paddle/pten/kernels/funcs/math_function_test.cu @@ -13,17 +13,20 @@ // limitations under the License. #include "gtest/gtest.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" -void fill_fp16_data(paddle::platform::float16* in_ptr, size_t size, +void fill_fp16_data(paddle::platform::float16* in_ptr, + size_t size, const std::vector& data) { PADDLE_ENFORCE_EQ( - size, data.size(), + size, + data.size(), paddle::platform::errors::InvalidArgument( "The size of argument data should" " be equal to the argument size. Expected %d, but received %d.", - size, data.size())); + size, + data.size())); for (size_t i = 0; i < data.size(); ++i) { in_ptr[i] = paddle::platform::float16(data[i]); } @@ -59,8 +62,8 @@ TEST(math_function, notrans_mul_trans_fp32) { paddle::framework::TensorCopySync(input1, gpu_place, &input2_gpu); out_gpu.mutable_data({2, 2}, gpu_place); - GetBlas(context).MatMul(input1_gpu, false, input2_gpu, true, 1, - &out_gpu, 0); + GetBlas(context).MatMul( + input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0); paddle::framework::TensorCopySync(out_gpu, cpu_place, &out); @@ -102,8 +105,13 @@ TEST(math_function, notrans_mul_trans_fp16) { out_gpu.mutable_data({2, 2}, gpu_place); GetBlas(context).MatMul( - input1_gpu, false, input2_gpu, true, paddle::platform::float16(1), - &out_gpu, paddle::platform::float16(0)); + input1_gpu, + false, + input2_gpu, + true, + paddle::platform::float16(1), + &out_gpu, + paddle::platform::float16(0)); paddle::framework::TensorCopySync(out_gpu, cpu_place, &out); @@ -139,8 +147,8 @@ TEST(math_function, trans_mul_notrans_fp32) { out_gpu.mutable_data({3, 3}, gpu_place); - GetBlas(context).MatMul(input1_gpu, true, input2_gpu, false, 1, - &out_gpu, 0); + GetBlas(context).MatMul( + input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0); paddle::framework::TensorCopySync(out_gpu, cpu_place, &out); @@ -187,8 +195,13 @@ TEST(math_function, trans_mul_notrans_fp16) { out_gpu.mutable_data({3, 3}, gpu_place); GetBlas(context).MatMul( - input1_gpu, true, input2_gpu, false, paddle::platform::float16(1), - &out_gpu, paddle::platform::float16(0)); + input1_gpu, + true, + input2_gpu, + false, + paddle::platform::float16(1), + &out_gpu, + paddle::platform::float16(0)); paddle::framework::TensorCopySync(out_gpu, cpu_place, &out); @@ -241,8 +254,8 @@ TEST(math_function, gemm_notrans_cublas_fp32) { float* b = input2_gpu.data(); float* c = input3_gpu.mutable_data(gpu_place); - GetBlas(context).GEMM(false, false, m, n, k, 1, a, 3, b + 1, 4, 1, - c + 1, 4); + GetBlas(context).GEMM( + false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4); paddle::framework::TensorCopySync(input3_gpu, cpu_place, &input3); @@ -292,8 +305,8 @@ TEST(math_function, gemm_notrans_cublas_fp16) { fill_fp16_data(input1_ptr, input1.numel(), {0, 1, 2, 3, 4, 5}); paddle::platform::float16* input2_ptr = input2.mutable_data({3, 4}, cpu_place); - fill_fp16_data(input2_ptr, input2.numel(), - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + fill_fp16_data( + input2_ptr, input2.numel(), {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); paddle::platform::float16* input3_ptr = input3.mutable_data({2, 4}, cpu_place); fill_fp16_data(input3_ptr, input3.numel(), {0, 1, 2, 3, 4, 5, 6, 7}); @@ -307,8 +320,19 @@ TEST(math_function, gemm_notrans_cublas_fp16) { input3_gpu.mutable_data(gpu_place); GetBlas(context).GEMM( - false, false, m, n, k, static_cast(1), a, 3, - b + 1, 4, static_cast(1), c + 1, 4); + false, + false, + m, + n, + k, + static_cast(1), + a, + 3, + b + 1, + 4, + static_cast(1), + c + 1, + 4); paddle::framework::TensorCopySync(input3_gpu, cpu_place, &input3); @@ -365,8 +389,8 @@ TEST(math_function, gemm_trans_cublas_fp32) { float* b = input2_gpu.data(); float* c = input3_gpu.mutable_data(gpu_place); - GetBlas(context).GEMM(false, true, m, n, k, 1, a, 3, b + 3, 3, 1, - c + 1, 4); + GetBlas(context).GEMM( + false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4); paddle::framework::TensorCopySync(input3_gpu, cpu_place, &input3); @@ -410,8 +434,8 @@ TEST(math_function, gemm_trans_cublas_fp16) { fill_fp16_data(input1_ptr, input1.numel(), {0, 1, 2, 3, 4, 5}); paddle::platform::float16* input2_ptr = input2.mutable_data({4, 3}, cpu_place); - fill_fp16_data(input2_ptr, input2.numel(), - {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11}); + fill_fp16_data( + input2_ptr, input2.numel(), {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11}); paddle::platform::float16* input3_ptr = input3.mutable_data({2, 4}, cpu_place); fill_fp16_data(input3_ptr, input3.numel(), {0, 1, 2, 3, 4, 5, 6, 7}); @@ -425,8 +449,19 @@ TEST(math_function, gemm_trans_cublas_fp16) { input3_gpu.mutable_data(gpu_place); GetBlas(context).GEMM( - false, true, m, n, k, static_cast(1), a, 3, - b + 3, 3, static_cast(1), c + 1, 4); + false, + true, + m, + n, + k, + static_cast(1), + a, + 3, + b + 3, + 3, + static_cast(1), + c + 1, + 4); paddle::framework::TensorCopySync(input3_gpu, cpu_place, &input3); @@ -476,8 +511,14 @@ void GemvTest(int m, int n, bool trans) { paddle::framework::TensorCopySync(mat_a, gpu_place, &g_mat_a); paddle::framework::TensorCopySync(vec_b, gpu_place, &g_vec_b); - GetBlas(context).GEMV(trans, static_cast(m), static_cast(n), 1., - g_data_a, g_data_b, 0., g_data_c); + GetBlas(context).GEMV(trans, + static_cast(m), + static_cast(n), + 1., + g_data_a, + g_data_b, + 0., + g_data_c); paddle::framework::TensorCopySync(g_vec_c, cpu_place, &vec_c); diff --git a/paddle/pten/kernels/gpu/trace_kernel.cu b/paddle/pten/kernels/gpu/trace_kernel.cu index 155bfbd02af17..f552386fafdc7 100644 --- a/paddle/pten/kernels/gpu/trace_kernel.cu +++ b/paddle/pten/kernels/gpu/trace_kernel.cu @@ -36,7 +36,7 @@ void TraceKernel(const Context& ctx, kernels::TensorReduceImpl>( ctx, diag, out, kps::IdentityFunctor(), reduce_dims, stream); } else { - paddle::operators::math::SetConstant functor; + pten::funcs::SetConstant functor; functor(ctx, out, static_cast(0)); } } diff --git a/paddle/pten/kernels/impl/trace_kernel_impl.h b/paddle/pten/kernels/impl/trace_kernel_impl.h index 4dbba9bc69e61..1b499681bbbe4 100644 --- a/paddle/pten/kernels/impl/trace_kernel_impl.h +++ b/paddle/pten/kernels/impl/trace_kernel_impl.h @@ -22,8 +22,9 @@ #include #include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace pten { template @@ -196,7 +197,7 @@ void TraceGradKernel(const Context& ctx, auto* out_data = out_grad.data(); T* x_data = in_grad->mutable_data(ctx.GetPlace()); - paddle::operators::math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx, in_grad, static_cast(0.0)); auto dim1 = axis1;