Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【2.0 API】Enhance affine grid operator #26385

Merged
merged 7 commits into from
Aug 25, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions paddle/fluid/operators/affine_grid_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@ using Tensor = framework::Tensor;

template <typename T>
struct Linspace<paddle::platform::CPUDeviceContext, T> {
void operator()(T start, T end, int count, framework::Tensor* numbers,
void operator()(T start, T end, int count, bool align_corners,
framework::Tensor* numbers,
const framework::ExecutionContext& ctx) {
T* number_data = numbers->mutable_data<T>({count}, platform::CPUPlace());
T slice = (end - start) / (T)(count - 1);
if (!align_corners) {
slice = (end - start) / (T)count;
start *= (T)(count - 1) / (T)count;
}
for (int i = 0; i < count; ++i) {
number_data[i] = start + (T)i * slice;
}
Expand Down Expand Up @@ -130,6 +135,10 @@ class AffineGridOpMaker : public framework::OpProtoAndCheckerMaker {
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
.SetDefault(true);
AddAttr<bool>("align_corners",
"(bool, default false) Whether to align the corners of input"
"and ouput.")
.SetDefault(true);
AddAttr<std::vector<int>>(
"output_shape",
"The target output image shape with format [N, C, H, W].")
Expand Down Expand Up @@ -164,10 +173,12 @@ class AffineGridOpMaker : public framework::OpProtoAndCheckerMaker {
[-1. -0.5 0. 0.5 1. ]
[-1. -0.5 0. 0.5 1. ]
[-1. -0.5 0. 0.5 1. ]]]
C[0] is the coordinates in height axis and C[1] is the coordinates in width axis.
C[0] is the coordinates in height axis and C[1] is the coordinates in
width axis.

Step2:
Tanspose and reshape C to shape [H * W, 2] and append ones to last dimension. The we get:
Tanspose and reshape C to shape [H * W, 2] and append ones to last
dimension. The we get:
C_ = [[-1. -1. 1. ]
[-0.5 -1. 1. ]
[ 0. -1. 1. ]
Expand Down
58 changes: 58 additions & 0 deletions paddle/fluid/operators/affine_grid_op.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/* Copyright (c) 2010 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/affine_grid_op.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

template <typename T>
__global__ void LinspaceKernel(T start, T step, int64_t size, T* out) {
CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; }
}

template <typename T>
struct Linspace<paddle::platform::CUDADeviceContext, T> {
void operator()(T start, T end, int count, bool align_corners,
framework::Tensor* numbers,
const framework::ExecutionContext& ctx) {
T* number_data = numbers->mutable_data<T>({count}, ctx.GetPlace());
T slice = (end - start) / (T)(count - 1);
if (!align_corners) {
slice = (end - start) / (T)count;
start *= (T)(count - 1) / (T)count;
}
auto stream = ctx.cuda_device_context().stream();
int block = 512;
int grid = (count + block - 1) / block;
LinspaceKernel<T><<<grid, block, 0, stream>>>(start, slice, count,
number_data);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
affine_grid,
ops::AffineGridOpKernel<paddle::platform::CUDADeviceContext, float>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so the cuda version mostly reused the eigen implementation? some preliminary benchmark would be nice if time permits.

Copy link
Contributor Author

@wanghaoshuang wanghaoshuang Aug 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added CUDA kernel.
I will give some performance benchmark on CUDNN version and CUDA kernel version in the future.
Currently, it will use CUDNN kernel when align_corners is true and use CUDA kernel when align_corners is false.

ops::AffineGridOpKernel<paddle::platform::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
affine_grid_grad,
ops::AffineGridGradOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::AffineGridGradOpKernel<paddle::platform::CUDADeviceContext, double>);
22 changes: 13 additions & 9 deletions paddle/fluid/operators/affine_grid_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,33 @@ using Array4 = Eigen::DSizes<int64_t, 4>;
*/
template <typename DeviceContext, typename T>
struct Linspace {
void operator()(T start, T end, int count, framework::Tensor* numbers,
void operator()(T start, T end, int count, bool align_corners,
framework::Tensor* numbers,
const framework::ExecutionContext& ctx);
};

template <typename DeviceContext, typename T>
inline void GetIdxMap(int n, int h, int w, Tensor* grid,
inline void GetIdxMap(int n, int h, int w, bool align_corners, Tensor* grid,
const framework::ExecutionContext& ctx) {
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
grid->mutable_data<T>({n, h, w, 3}, ctx.GetPlace());
auto grid_t = EigenTensor<T, 4>::From(*grid);
// Get indexes of height with shape [height, width, 1]
Tensor h_idx;
Linspace<DeviceContext, T> linspace;
linspace((T)-1, (T)1, h, &h_idx, ctx);
linspace((T)-1, (T)1, h, align_corners, &h_idx, ctx);
willthefrog marked this conversation as resolved.
Show resolved Hide resolved
auto h_idx_t = EigenTensor<T, 1>::From(h_idx);
// Get indexes of width with shape [height, width, 1]
Tensor w_idx;
linspace((T)-1, (T)1, w, &w_idx, ctx);
linspace((T)-1, (T)1, w, align_corners, &w_idx, ctx);
auto w_idx_t = EigenTensor<T, 1>::From(w_idx);
// Get constant ones tensor with shape [height, width, 1]
Tensor ones;
ones.mutable_data<T>({h, w, 1}, ctx.GetPlace());
auto ones_t = EigenTensor<T, 3>::From(ones).setConstant((T)1);

math::SetConstant<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), &ones, static_cast<T>(1));
auto ones_t = EigenTensor<T, 3>::From(ones);
// Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and
// ones
Tensor w_idx_map;
Expand All @@ -74,11 +78,9 @@ inline void GetIdxMap(int n, int h, int w, Tensor* grid,
Tensor w_h_one_idx_map;
w_h_one_idx_map.mutable_data<T>({h, w, 3}, ctx.GetPlace());
auto w_h_one_idx_map_t = EigenTensor<T, 3>::From(w_h_one_idx_map);

w_idx_map_t.device(place) = w_idx_t.reshape(Array2(1, w))
.broadcast(Array2(h, 1))
.reshape(Array3(h, w, 1));

h_idx_map_t.device(place) = h_idx_t.reshape(Array2(1, h))
.broadcast(Array2(w, 1))
.shuffle(Array2(1, 0))
Expand All @@ -97,6 +99,7 @@ class AffineGridOpKernel : public framework::OpKernel<T> {
auto* theta = ctx.Input<Tensor>("Theta");
int n = theta->dims()[0];
auto size_attr = ctx.Attr<std::vector<int>>("output_shape");
auto align_corners = ctx.Attr<bool>("align_corners");
int h = 0;
int w = 0;
if (size_attr.size() == 0) {
Expand All @@ -116,7 +119,7 @@ class AffineGridOpKernel : public framework::OpKernel<T> {
ctx.template device_context<DeviceContext>(), output,
static_cast<T>(0));
Tensor grid;
GetIdxMap<DeviceContext, T>(n, h, w, &grid, ctx);
GetIdxMap<DeviceContext, T>(n, h, w, align_corners, &grid, ctx);
// output = grid * theta.T
// TODO(wanghaoshuang): Refine batched matrix multiply
auto blas = math::GetBlas<DeviceContext, T>(ctx);
Expand All @@ -140,6 +143,7 @@ class AffineGridGradOpKernel : public framework::OpKernel<T> {
auto theta_grad = ctx.Output<Tensor>(framework::GradVarName("Theta"));
int n = output_grad->dims()[0];
auto size_attr = ctx.Attr<std::vector<int>>("output_shape");
auto align_corners = ctx.Attr<bool>("align_corners");
int h = 0;
int w = 0;
if (size_attr.size() == 0) {
Expand All @@ -158,7 +162,7 @@ class AffineGridGradOpKernel : public framework::OpKernel<T> {
ctx.template device_context<DeviceContext>(), theta_grad,
static_cast<T>(0));
Tensor grid;
GetIdxMap<DeviceContext, T>(n, h, w, &grid, ctx);
GetIdxMap<DeviceContext, T>(n, h, w, align_corners, &grid, ctx);
// output = grid * theta.T
// TODO(wanghaoshuang): Refine batched matrix multiply
auto blas = math::GetBlas<DeviceContext, T>(ctx);
Expand Down
4 changes: 0 additions & 4 deletions python/paddle/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9083,10 +9083,6 @@ def _attr_offsets_check(offset_val):

def affine_grid(theta, out_shape, name=None):
"""
:alias_main: paddle.nn.functional.affine_grid
:alias: paddle.nn.functional.affine_grid,paddle.nn.functional.vision.affine_grid
:old_api: paddle.fluid.layers.affine_grid

It generates a grid of (x,y) coordinates using the parameters of
the affine transformation that correspond to a set of points where
the input feature map should be sampled to produce the transformed
Expand Down
134 changes: 134 additions & 0 deletions python/paddle/fluid/tests/unittests/test_affine_grid_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from paddle import fluid, nn
import paddle.fluid.dygraph as dg
import paddle.nn.functional as F
import paddle.fluid.initializer as I
import unittest


class AffineGridTestCase(unittest.TestCase):
def __init__(self,
methodName='runTest',
theta_shape=(20, 2, 3),
output_shape=[20, 2, 5, 7],
align_corners=True,
dtype="float32"):
super(AffineGridTestCase, self).__init__(methodName)

self.theta_shape = theta_shape
self.output_shape = output_shape
self.align_corners = align_corners
self.dtype = dtype

def setUp(self):
self.theta = np.random.randn(*(self.theta_shape)).astype(self.dtype)

def fluid_layer(self, place):
# align_corners = True
main = fluid.Program()
start = fluid.Program()
with fluid.unique_name.guard():
with fluid.program_guard(main, start):
theta_var = fluid.data(
"input", self.theta_shape, dtype=self.dtype)
y_var = fluid.layers.affine_grid(theta_var, self.output_shape)
feed_dict = {"input": self.theta}
exe = fluid.Executor(place)
exe.run(start)
y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var])
return y_np

def functional(self, place):
main = fluid.Program()
start = fluid.Program()
with fluid.unique_name.guard():
with fluid.program_guard(main, start):
theta_var = fluid.data(
"input", self.theta_shape, dtype=self.dtype)
y_var = F.affine_grid(
theta_var,
self.output_shape,
align_corners=self.align_corners)
feed_dict = {"input": self.theta}
exe = fluid.Executor(place)
exe.run(start)
y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var])
return y_np

def paddle_dygraph_layer(self):
theta_var = dg.to_variable(self.theta)
y_var = F.affine_grid(
theta_var, self.output_shape, align_corners=self.align_corners)
y_np = y_var.numpy()
return y_np

def _test_equivalence(self, place):
place = fluid.CPUPlace()
result1 = self.fluid_layer(place)
result2 = self.functional(place)
with dg.guard(place):
result3 = self.paddle_dygraph_layer()
if self.align_corners:
np.testing.assert_array_almost_equal(result1, result2)
np.testing.assert_array_almost_equal(result2, result3)

def runTest(self):
place = fluid.CPUPlace()
self._test_equivalence(place)

if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
self._test_equivalence(place)


class AffineGridErrorTestCase(AffineGridTestCase):
def runTest(self):
place = fluid.CPUPlace()
with dg.guard(place):
with self.assertRaises(ValueError):
self.paddle_dygraph_layer()


def add_cases(suite):
suite.addTest(AffineGridTestCase(methodName='runTest'))
suite.addTest(AffineGridTestCase(methodName='runTest', align_corners=True))

suite.addTest(AffineGridTestCase(methodName='runTest', align_corners=False))

suite.addTest(
AffineGridTestCase(
methodName='runTest',
theta_shape=(20, 2, 3),
output_shape=[20, 1, 7, 7],
align_corners=True))


def add_error_cases(suite):
suite.addTest(
AffineGridErrorTestCase(
methodName='runTest', output_shape="not_valid"))


def load_tests(loader, standard_tests, pattern):
suite = unittest.TestSuite()
add_cases(suite)
add_error_cases(suite)
return suite


if __name__ == '__main__':
unittest.main()
Loading