Skip to content

Commit

Permalink
Compilation optimization for reduce (PaddlePaddle#57082)
Browse files Browse the repository at this point in the history
  • Loading branch information
tianhaodongbd authored and Frida-a committed Oct 14, 2023
1 parent 8622865 commit daf644f
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 21 deletions.
13 changes: 7 additions & 6 deletions paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/primitive/functor_primitives.h"
#include "paddle/phi/kernels/reduce_sum_kernel.h"

namespace phi {

Expand Down Expand Up @@ -88,12 +89,12 @@ void BroadcastTensorsGradKernel(const Context& ctx,
phi::Copy(ctx, *input_tensor, ctx.GetPlace(), false, output_tensor);
} else {
// reduce_sum implementation on CUDA
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx,
*input_tensor,
output_tensor,
kps::IdentityFunctor<T>(),
reduce_dims_vec);
phi::SumKernel<T, Context>(ctx,
*input_tensor,
reduce_dims_vec,
output_tensor->dtype(),
false,
output_tensor);
}
}
}
Expand Down
17 changes: 9 additions & 8 deletions paddle/phi/kernels/gpu/elementwise_grad.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/broadcast_function.h"
#include "paddle/phi/kernels/funcs/elementwise_grad_base.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/reduce_sum_kernel.h"

namespace phi {

Expand All @@ -31,8 +32,8 @@ void ReduceWrapper(const GPUContext &dev_ctx,
DenseTensor *dst) {
std::vector<int> reduce_dims =
funcs::GetReduceDim(dst->dims(), src->dims(), axis);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx, *src, dst, kps::IdentityFunctor<T>(), reduce_dims);
phi::SumKernel<T, GPUContext>(
dev_ctx, *src, reduce_dims, src->dtype(), false, dst);
}

template <typename T, typename Functor>
Expand Down Expand Up @@ -169,8 +170,8 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
}
std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims);
phi::SumKernel<T, GPUContext>(
ctx, dout, reduce_dims, dout.dtype(), false, dx);
}
}
// dy
Expand All @@ -183,8 +184,8 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
} else {
std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dy, kps::IdentityFunctor<T>(), reduce_dims);
phi::SumKernel<T, GPUContext>(
ctx, dout, reduce_dims, dout.dtype(), false, dy);
}
}
}
Expand Down Expand Up @@ -280,8 +281,8 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
}
std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims);
phi::SumKernel<T, GPUContext>(
ctx, dout, reduce_dims, dout.dtype(), false, dx);
}
}
// dy
Expand Down
5 changes: 3 additions & 2 deletions paddle/phi/kernels/gpu/expand_as_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/reduce_sum_kernel.h"

namespace phi {

Expand Down Expand Up @@ -46,8 +47,8 @@ void ExpandAsGradKernel(const Context& context,
} else {
std::vector<int> reduce_dims = funcs::GetReduceDim(in_dims, out_dims, -1);

funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
context, out_grad, in_grad, kps::IdentityFunctor<T>(), reduce_dims);
phi::SumKernel<T, Context>(
context, out_grad, reduce_dims, out_grad.dtype(), false, in_grad);
}
}

Expand Down
5 changes: 3 additions & 2 deletions paddle/phi/kernels/gpu/expand_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/reduce_sum_kernel.h"

namespace phi {

Expand All @@ -33,8 +34,8 @@ void ExpandGradKernel(const Context& ctx,
} else {
std::vector<int> reduce_dims =
funcs::GetReduceDim(x_grad->dims(), out_grad.dims(), -1);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, out_grad, x_grad, kps::IdentityFunctor<T>(), reduce_dims);
phi::SumKernel<T, Context>(
ctx, out_grad, reduce_dims, out_grad.dtype(), false, x_grad);
}
}

Expand Down
5 changes: 2 additions & 3 deletions paddle/phi/kernels/kps/reduce_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/gpu/reduce.h"
#include "paddle/phi/kernels/legacy/reduce_max_kernel.h"
#include "paddle/phi/kernels/prod_kernel.h"
#include "paddle/phi/kernels/reduce_all_kernel.h"
#include "paddle/phi/kernels/reduce_amin_kernel.h"
Expand Down Expand Up @@ -102,9 +103,7 @@ void MaxKernel(const Context& dev_ctx,
bool keep_dim,
DenseTensor* out) {
bool reduce_all = recompute_reduce_all(x, dims);
auto out_dtype = x.dtype();
phi::Reduce<T, kps::MaxFunctor, kps::IdentityFunctor>(
dev_ctx, x, reduce_all, dims.GetData(), keep_dim, out_dtype, out);
phi::MaxRawKernel<T, Context>(dev_ctx, x, dims, keep_dim, reduce_all, out);
}

template <typename T, typename Context>
Expand Down

0 comments on commit daf644f

Please sign in to comment.