Skip to content

Commit

Permalink
[NPU] change transpose to transpose2 (PaddlePaddle#31734)
Browse files Browse the repository at this point in the history
* change transpose to transpose2

* fix bug
  • Loading branch information
zhiqiu authored and frankwhzhang committed Apr 12, 2021
1 parent 07f97a4 commit 3dcd6b5
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 77 deletions.
74 changes: 36 additions & 38 deletions paddle/fluid/operators/transpose_op_npu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,75 +9,73 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#ifdef PADDLE_WITH_ASCEND_CL
#include <iostream>
#include <memory>
#include <string>
#include <iostream>

#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/expand_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"

namespace paddle {
namespace operators {

template <typename DeviceContext, typename T>
class TransposeNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<framework::LoDTensor>("X");
auto* out = ctx.Output<framework::LoDTensor>("Out");
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
framework::NPUAttributeMap attr_input = {{"perm", axis}};
out->mutable_data<T>(ctx.device_context().GetPlace());
auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input);
auto stream = ctx.template device_context<paddle::platform::NPUDeviceContext>().stream();
runner.Run(stream);

}
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<framework::LoDTensor>("X");
auto* out = ctx.Output<framework::LoDTensor>("Out");
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
framework::NPUAttributeMap attr_input = {{"perm", axis}};
out->mutable_data<T>(ctx.device_context().GetPlace());
auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input);
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream);
}
};

template <typename T>
class TransposeGradNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto* out_grad = ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* x_grad = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
void Compute(const framework::ExecutionContext& ctx) const override {
auto* out_grad =
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
std::vector<int> reversed_axis(axis);
for (size_t i = 0; i < axis.size(); i++) {
reversed_axis[axis[i]] = i;
}

x_grad->mutable_data<T>(ctx.GetPlace());
framework::NPUAttributeMap attr_input = {{"perm", reversed_axis}};
auto runner = NpuOpRunner("TransposeD", {*out_grad}, {*x_grad}, attr_input);
auto stream = ctx.template device_context<paddle::platform::NPUDeviceContext>().stream();
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream);
}
};

}
}
} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;

REGISTER_OP_NPU_KERNEL(transpose,
REGISTER_OP_NPU_KERNEL(
transpose2,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, float>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, paddle::platform::float16>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext,
paddle::platform::float16>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, int>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, uint8_t>,
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, int8_t>
);

REGISTER_OP_NPU_KERNEL(transpose_grad,
ops::TransposeGradNPUKernel<float>,
ops::TransposeGradNPUKernel<paddle::platform::float16>,
ops::TransposeGradNPUKernel<int>,
ops::TransposeGradNPUKernel<uint8_t>,
ops::TransposeGradNPUKernel<int8_t>
);



#endif
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, int8_t>);

REGISTER_OP_NPU_KERNEL(transpose2_grad, ops::TransposeGradNPUKernel<float>,
ops::TransposeGradNPUKernel<paddle::platform::float16>,
ops::TransposeGradNPUKernel<int>,
ops::TransposeGradNPUKernel<uint8_t>,
ops::TransposeGradNPUKernel<int8_t>);
70 changes: 32 additions & 38 deletions paddle/fluid/operators/transpose_op_npu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ limitations under the License. */
#include <unistd.h>
#endif

#include <string>
#include <cmath>
#include <iostream>
#include <numeric>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include <numeric>
#include <iostream>

#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
Expand All @@ -32,17 +32,18 @@ namespace f = paddle::framework;
namespace p = paddle::platform;
namespace m = paddle::operators::math;

USE_OP(transpose);
USE_OP_DEVICE_KERNEL(transpose, NPU);

USE_OP(transpose2);
USE_OP_DEVICE_KERNEL(transpose2, NPU);

template <typename T>
void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
// init
// init
auto x = scope->Var("X");
auto out = scope->Var("Out");
auto xshape = scope->Var("XShape");
auto* x_t = x->GetMutable<f::LoDTensor>();
auto* out_t = out->GetMutable<f::LoDTensor>();
auto* xshape_t = xshape->GetMutable<f::LoDTensor>();
auto place = ctx.GetPlace();

int dim0 = 2;
Expand All @@ -54,12 +55,13 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait();
out_t->mutable_data<T>(place);
ctx.Wait();
f::AttributeMap attrs = {
{"axis", std::vector<int>({1, 0})},
{"data_format", std::string("AnyLayout")}
};
auto op = f::OpRegistry::CreateOp("transpose", {{"X", {"X"}}},
{{"Out", {"Out"}}}, attrs);
xshape_t->Resize({dim0, dim1});
xshape_t->mutable_data<T>(place);
f::AttributeMap attrs = {{"axis", std::vector<int>({1, 0})},
{"data_format", std::string("AnyLayout")}};
auto op = f::OpRegistry::CreateOp("transpose2", {{"X", {"X"}}},
{{"Out", {"Out"}}, {"XShape", {"XShape"}}},
attrs);
ctx.Wait();
op->Run(*scope, place);
ctx.Wait();
Expand All @@ -76,47 +78,42 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ(out_v[5], 5);
}


template <typename T>
void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
// init
auto x = scope->Var("X");
// init
auto xshape = scope->Var("XShape");
auto x_grad = scope->Var("X@GRAD");
auto out = scope->Var("Out");
auto out_grad = scope->Var("Out@GRAD");

auto* x_grad_t = x_grad->GetMutable<f::LoDTensor>();
auto* x_t = x->GetMutable<f::LoDTensor>();
auto* xshape_t = xshape->GetMutable<f::LoDTensor>();
auto* out_grad_t = out_grad->GetMutable<f::LoDTensor>();
auto* out_t = out->GetMutable<f::LoDTensor>();

int dim0 = 2;
int dim1 = 3;
auto place = ctx.GetPlace();

TensorFromVector(std::vector<T>({0, 1, 2, 3, 4, 5}), ctx, out_grad_t);
TensorFromVector(std::vector<T>({0, 1, 2, 3, 4, 5}), ctx, x_t);
ctx.Wait();

x_grad_t->Resize({dim0, dim1});
x_t->Resize({dim0, dim1});
xshape_t->Resize(
{0, dim0,
dim1}); // NOTE(zhiqiu): 0 is needed, see its infershape function
out_grad_t->Resize({dim0, dim1});
out_t->Resize({dim0, dim1});

x_grad_t->mutable_data<T>(place);
out_t->mutable_data<T>(place);
ctx.Wait();
f::AttributeMap attrs = {
{"axis", std::vector<int>({1, 0})},
{"data_format", std::string("AnyLayout")}
};
f::AttributeMap attrs = {{"axis", std::vector<int>({1, 0})},
{"data_format", std::string("AnyLayout")}};

auto op = f::OpRegistry::CreateOp(
"transpose_grad",
{{"Out@GRAD", {"Out@GRAD"}}, {"X", {"X"}}, {"Out", {"Out"}}},
"transpose2_grad", {{"Out@GRAD", {"Out@GRAD"}}, {"XShape", {"XShape"}}},
{{"X@GRAD", {"X@GRAD"}}}, attrs);

op->Run(*scope, place);
ctx.Wait();
ctx.Wait();
std::vector<T> out_v;
TensorToVector(*x_grad_t, ctx, &out_v);
ctx.Wait();
ctx.Wait();

EXPECT_EQ(x_grad_t->numel(), dim0 * dim1);
EXPECT_EQ(out_v[0], 0);
Expand All @@ -125,19 +122,16 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
EXPECT_EQ(out_v[3], 4);
EXPECT_EQ(out_v[4], 2);
EXPECT_EQ(out_v[5], 5);

}


TEST(transpose, NPU_fp32) {
TEST(transpose2, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
Compare<float>(&scope, ctx);
}

TEST(transpose_grad, NPU_fp32) {
TEST(transpose2_grad, NPU_fp32) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(0));
CompareGrad<float>(&scope, ctx);
}

Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
class TestTransposeOp(OpTest):
def setUp(self):
self.set_npu()
self.op_type = "transpose"
self.op_type = "transpose2"
self.place = paddle.NPUPlace(0)
self.init_dtype()
self.init_input_output()
Expand Down

0 comments on commit 3dcd6b5

Please sign in to comment.