Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon No.32】为 Paddle 优化 expand_as 前向&反向 op 在 GPU 上的计算性能 #52700

Merged
merged 5 commits into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion paddle/phi/kernels/gpu/expand_as_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,43 @@
#include "paddle/phi/kernels/expand_as_grad_kernel.h"

#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"

namespace phi {

template <typename T, typename Context>
void ExpandAsGradKernel(const Context& context,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int>& target_shape,
DenseTensor* in_grad) {
auto in_dims = x.dims();
auto out_dims = out_grad.dims();
int in_rank = in_dims.size();
int out_rank = out_dims.size();

PADDLE_ENFORCE_LE(
out_rank,
6,
errors::InvalidArgument("The rank of the input 'Out@GRAD' for "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

读起来不太通顺

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

请问哪里不通顺呢?这里直接借鉴的是文件expand_grad_kernel_impl.h L94-L101

"expand_as_v2_grad op must be less than or equal "
"to 6, but the value received is %d.",
out_rank));

context.template Alloc<T>(in_grad);
if (in_dims == out_dims) {
phi::Copy(context, out_grad, context.GetPlace(), false, in_grad);
} else {
std::vector<int> reduce_dims = funcs::GetReduceDim(in_dims, out_dims, -1);

funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
context, out_grad, in_grad, kps::IdentityFunctor<T>(), reduce_dims);
}
}

} // namespace phi

PD_REGISTER_KERNEL(expand_as_grad,
GPU,
Expand Down
64 changes: 63 additions & 1 deletion paddle/phi/kernels/gpu/expand_as_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,70 @@
#include "paddle/phi/kernels/expand_as_kernel.h"

#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/expand_as_kernel_impl.h"
#include "paddle/phi/kernels/funcs/broadcast_function.h"

namespace phi {

template <typename T, typename Context>
void ExpandAsKernel(const Context& ctx,
const DenseTensor& x,
const paddle::optional<DenseTensor>& y,
const std::vector<int>& target_shape,
DenseTensor* out) {
int rank = x.dims().size();
int target_rank = static_cast<int>(target_shape.size());
auto vec_in_dims = phi::vectorize<int>(x.dims());

unsigned int diff = target_rank - rank;
vec_in_dims.insert(vec_in_dims.begin(), diff, 1);

for (unsigned int i = 0; i < vec_in_dims.size(); ++i) {
PADDLE_ENFORCE_NE(
target_shape[i],
0,
errors::InvalidArgument("The value of target shape cannot be zero."));
if (i < diff) {
PADDLE_ENFORCE_GT(
target_shape[i],
0,
errors::InvalidArgument(
"The expanded size (%d) for non-existing dimensions must be "
"positive for expand_as_v2 op.",
target_shape[i]));
} else if (target_shape[i] > 0) {
if (vec_in_dims[i] != 1) {
PADDLE_ENFORCE_EQ(
vec_in_dims[i],
target_shape[i],
errors::InvalidArgument(
"The value (%d) of the non-singleton dimension does not match"
" the corresponding value (%d) in shape for expand_as_v2 op.",
vec_in_dims[i],
target_shape[i]));
}
} else {
PADDLE_ENFORCE_EQ(
target_shape[i],
-1,
errors::InvalidArgument(
"When the value in shape is negative for expand_as_v2 op, "
"only -1 is supported, but the value received is %d.",
target_shape[i]));
}
}

out->Resize(phi::make_ddim(target_shape));
ctx.template Alloc<T>(out);
std::vector<const DenseTensor*> ins = {&x};
std::vector<DenseTensor*> outs = {out};
phi::funcs::BroadcastKernel<ElementwiseType::kUnary, T, T>(
ctx, ins, &outs, -1, kps::IdentityFunctor<T>());
}

} // namespace phi

PD_REGISTER_KERNEL(expand_as,
GPU,
Expand Down