Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move meshgrid to phi #40994

Merged
merged 35 commits into from
Mar 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
a7a3cd9
move momentum, rmsprop to phi; test=develop
phlrain Mar 10, 2022
8dd812c
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 10, 2022
b8c2003
update
phlrain Mar 10, 2022
56e2416
update
phlrain Mar 11, 2022
89e8b37
update
phlrain Mar 11, 2022
cb49f3b
update
phlrain Mar 11, 2022
223e2ff
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 11, 2022
3ef5aae
udpate; test=develop
phlrain Mar 11, 2022
8a07c03
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 11, 2022
d3b3897
fix xpu npu bugs; test=develop
phlrain Mar 11, 2022
b37733a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 11, 2022
f5c4967
fix npu bug; test=develop
phlrain Mar 12, 2022
d11b050
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 12, 2022
911ef93
fix windows compile error; test=develop
phlrain Mar 13, 2022
5ae18b0
fix windows compile error; test=develop
phlrain Mar 13, 2022
1fe99bd
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 13, 2022
4193107
polish code; test=develop
phlrain Mar 15, 2022
c1c7115
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 15, 2022
be6689f
fix conflict; test=develop
phlrain Mar 15, 2022
6586007
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 15, 2022
a230723
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 18, 2022
8044690
add meshgrid;
phlrain Mar 18, 2022
dc648d7
update
phlrain Mar 18, 2022
8f5f5f1
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 18, 2022
8ac993f
polish code
phlrain Mar 22, 2022
f430a7b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 22, 2022
63f3819
polish code;
phlrain Mar 23, 2022
cd8e2c4
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 23, 2022
f646793
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 24, 2022
95a9766
fix bug
phlrain Mar 24, 2022
77893c3
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 27, 2022
bcbe4fa
format; remove useless code
phlrain Mar 27, 2022
2f7a044
fix npu bug
phlrain Mar 28, 2022
7139feb
fix bug
phlrain Mar 28, 2022
6ab883b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions paddle/fluid/operators/math/selected_rows_functor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,13 +279,58 @@ struct SelectedRowsAddToTensor<platform::CPUDeviceContext, T> {
}
};

template <typename T>
struct SelectedRowsAddToTensor<phi::CPUContext, T> {
void operator()(const phi::CPUContext& context,
const phi::SelectedRows& input1, framework::Tensor* input2) {
if (UNLIKELY(input1.rows().size() == 0)) {
LOG(WARNING) << "input selected rows is empty!";
return;
}
auto in1_height = input1.height();
auto in2_dims = input2->dims();
PADDLE_ENFORCE_EQ(
in1_height, in2_dims[0],
platform::errors::InvalidArgument("The two inputs height must be equal."
"But recieved first input height = "
"[%d], second input height = [%d]",
in1_height, in2_dims[0]));

auto& in1_value = input1.value();
auto& in1_rows = input1.rows();

int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_ENFORCE_EQ(
in1_row_numel, input2->numel() / in1_height,
platform::errors::InvalidArgument(
"The two inputs width must be equal."
"But recieved first input width = [%d], second input width = [%d]",
in1_row_numel, input2->numel() / in1_height));

auto* in1_data = in1_value.data<T>();
auto* input2_data = input2->data<T>();

for (size_t i = 0; i < in1_rows.size(); i++) {
for (int64_t j = 0; j < in1_row_numel; j++) {
input2_data[in1_rows[i] * in1_row_numel + j] +=
in1_data[i * in1_row_numel + j];
}
}
}
};

template struct SelectedRowsAddToTensor<platform::CPUDeviceContext, float>;
template struct SelectedRowsAddToTensor<platform::CPUDeviceContext, double>;
template struct SelectedRowsAddToTensor<platform::CPUDeviceContext, int>;
template struct SelectedRowsAddToTensor<platform::CPUDeviceContext, int64_t>;
template struct SelectedRowsAddToTensor<platform::CPUDeviceContext,
platform::bfloat16>;

template struct SelectedRowsAddToTensor<phi::CPUContext, float>;
template struct SelectedRowsAddToTensor<phi::CPUContext, double>;
template struct SelectedRowsAddToTensor<phi::CPUContext, int>;
template struct SelectedRowsAddToTensor<phi::CPUContext, int64_t>;
template struct SelectedRowsAddToTensor<phi::CPUContext, platform::bfloat16>;
// This is a separated namespace for manipulate SelectedRows typed
// data. Like merge duplicated rows, adding two SelectedRows etc.
//
Expand Down
107 changes: 107 additions & 0 deletions paddle/fluid/operators/math/selected_rows_functor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,77 @@ struct SelectedRowsAddTensor<platform::CUDADeviceContext, T> {
}
};

template <typename T>
struct SelectedRowsAddTensor<phi::GPUContext, T> {
void operator()(const phi::GPUContext& context,
const phi::SelectedRows& input1,
const framework::Tensor& input2, framework::Tensor* output) {
auto in1_height = input1.height();
auto in2_dims = input2.dims();
auto out_dims = output->dims();
PADDLE_ENFORCE_EQ(
in1_height, in2_dims[0],
platform::errors::InvalidArgument(
"The two inputs height must be equal."
"But recieved first input height = [%d], first input height = [%d]",
in1_height, in2_dims[0]));
PADDLE_ENFORCE_EQ(
in1_height, out_dims[0],
platform::errors::InvalidArgument(
"The input and output height must be equal."
"But recieved input height = [%d], output height = [%d]",
in1_height, out_dims[0]));

auto& in1_value = input1.value();
auto& in1_rows = input1.rows();

int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_ENFORCE_EQ(
in1_row_numel, input2.numel() / in1_height,
platform::errors::InvalidArgument(
"The two inputs width must be equal."
"But recieved first input width = [%d], second input width = [%d]",
in1_row_numel, input2.numel() / in1_height));
PADDLE_ENFORCE_EQ(
in1_row_numel, output->numel() / in1_height,
platform::errors::InvalidArgument(
"The input and output width must be equal."
"But recieved input width = [%d], output width = [%d]",
in1_row_numel, output->numel() / in1_height));

auto* in1_data = in1_value.data<T>();
auto* in2_data = input2.data<T>();
auto* out_data = output->data<T>();

phi::funcs::SetConstant<phi::GPUContext, T> functor;
functor(context, output, static_cast<T>(0));

const int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddTensorKernel<
T, block_size><<<grid, threads, 0, context.stream()>>>(
in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), out_data,
in1_row_numel);

auto out_eigen = framework::EigenVector<T>::Flatten(*output);
auto in2_eigen = framework::EigenVector<T>::Flatten(input2);
out_eigen.device(*context.eigen_device()) = out_eigen + in2_eigen;
}
};

template struct SelectedRowsAddTensor<platform::CUDADeviceContext, float>;
template struct SelectedRowsAddTensor<platform::CUDADeviceContext, double>;
template struct SelectedRowsAdd<platform::CUDADeviceContext, platform::float16>;
template struct SelectedRowsAddTensor<platform::CUDADeviceContext,
platform::float16>;

template struct SelectedRowsAddTensor<phi::GPUContext, float>;
template struct SelectedRowsAddTensor<phi::GPUContext, double>;
template struct SelectedRowsAdd<phi::GPUContext, platform::float16>;
template struct SelectedRowsAddTensor<phi::GPUContext, platform::float16>;

template <typename T>
struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
void operator()(const platform::CUDADeviceContext& context,
Expand Down Expand Up @@ -285,12 +350,54 @@ struct SelectedRowsAddToTensor<platform::CUDADeviceContext, T> {
}
};

template <typename T>
struct SelectedRowsAddToTensor<phi::GPUContext, T> {
void operator()(const phi::GPUContext& context,
const phi::SelectedRows& input1, framework::Tensor* input2) {
auto in1_height = input1.height();
auto in2_dims = input2->dims();
PADDLE_ENFORCE_EQ(
in1_height, in2_dims[0],
platform::errors::InvalidArgument("The two inputs height must be equal."
"But recieved first input height = "
"[%d], second input height = [%d]",
in1_height, in2_dims[0]));

auto& in1_value = input1.value();
auto& in1_rows = input1.rows();

int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_ENFORCE_EQ(
in1_row_numel, input2->numel() / in1_height,
platform::errors::InvalidArgument(
"The two inputs width must be equal."
"But recieved first input width = [%d], second input width = [%d]",
in1_row_numel, input2->numel() / in1_height));

auto* in1_data = in1_value.data<T>();
auto* in2_data = input2->data<T>();
const int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddToTensorKernel<
T, block_size><<<grid, threads, 0, context.stream()>>>(
in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), in2_data,
in1_row_numel);
}
};

template struct SelectedRowsAddToTensor<platform::CUDADeviceContext, float>;
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext, double>;
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext, int>;
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext, int64_t>;
template struct SelectedRowsAddToTensor<platform::CUDADeviceContext,
platform::float16>;
template struct SelectedRowsAddToTensor<phi::GPUContext, float>;
template struct SelectedRowsAddToTensor<phi::GPUContext, double>;
template struct SelectedRowsAddToTensor<phi::GPUContext, int>;
template struct SelectedRowsAddToTensor<phi::GPUContext, int64_t>;
template struct SelectedRowsAddToTensor<phi::GPUContext, platform::float16>;

namespace scatter {

Expand Down
31 changes: 3 additions & 28 deletions paddle/fluid/operators/meshgrid_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/meshgrid_op.h"

#include <memory>
#include <string>
#include <vector>

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"

namespace paddle {
namespace operators {

Expand Down Expand Up @@ -145,29 +146,3 @@ REGISTER_OPERATOR(meshgrid, ops::MeshgridOp, ops::MeshgridOpMaker,
ops::MeshgridGradOpMaker<paddle::framework::OpDesc>,
ops::MeshgridGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(meshgrid_grad, ops::MeshgridGradOp);
REGISTER_OP_CPU_KERNEL(
meshgrid, ops::MeshgridKernel<paddle::platform::CPUDeviceContext, float>,
ops::MeshgridKernel<paddle::platform::CPUDeviceContext, double>,
ops::MeshgridKernel<paddle::platform::CPUDeviceContext, int>,
ops::MeshgridKernel<paddle::platform::CPUDeviceContext, int64_t>);

REGISTER_OP_CPU_KERNEL(
meshgrid_grad,
ops::MeshgridGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::MeshgridGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::MeshgridGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::MeshgridGradKernel<paddle::platform::CPUDeviceContext, double>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL(
meshgrid, ops::MeshgridKernel<paddle::platform::CUDADeviceContext, float>,
ops::MeshgridKernel<paddle::platform::CUDADeviceContext, double>,
ops::MeshgridKernel<paddle::platform::CUDADeviceContext, int>,
ops::MeshgridKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::MeshgridKernel<paddle::platform::CUDADeviceContext, bool>);
REGISTER_OP_CUDA_KERNEL(
meshgrid_grad,
ops::MeshgridGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::MeshgridGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::MeshgridGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::MeshgridGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
#endif
Loading