From 1c81d57938c55001c58336f29ed07ea4f1247cb9 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sat, 9 Sep 2017 19:01:24 +0800 Subject: [PATCH 1/3] Add huber loss operator. --- paddle/operators/huber_loss_op.cc | 108 ++++++++++++++++ paddle/operators/huber_loss_op.cu | 23 ++++ paddle/operators/huber_loss_op.h | 120 ++++++++++++++++++ paddle/pybind/pybind.cc | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/test_huber_loss_op.py | 56 ++++++++ 6 files changed, 309 insertions(+) create mode 100644 paddle/operators/huber_loss_op.cc create mode 100644 paddle/operators/huber_loss_op.cu create mode 100644 paddle/operators/huber_loss_op.h create mode 100644 python/paddle/v2/framework/tests/test_huber_loss_op.py diff --git a/paddle/operators/huber_loss_op.cc b/paddle/operators/huber_loss_op.cc new file mode 100644 index 0000000000000..461409b032377 --- /dev/null +++ b/paddle/operators/huber_loss_op.cc @@ -0,0 +1,108 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/huber_loss_op.h" + +namespace paddle { +namespace operators { + +class HuberLossOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "X must be initialized."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Y must be initialized."); + + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + + PADDLE_ENFORCE_EQ(x->dims(), y->dims(), + "Dimensions of X and Y must be the same."); + // we constraint shape of X to (N, 1), may expand to (N, x, ...) if needed + PADDLE_ENFORCE_EQ(framework::arity(x->dims()), 2, + "Tensor rank of X must be 2."); + PADDLE_ENFORCE_EQ(x->dims()[1], 1, "Second dimension of X must be 1."); + + ctx.Output("residual")->Resize(x->dims()); + ctx.Output("Out")->Resize({x->dims()[0], 1}); + } +}; + +template +class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker { + public: + HuberLossOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input value of HuberLossOp."); + AddInput("Y", "Target value of HuberLossOp."); + AddOutput("residual", + "Save residual value between Y and X. " + "Will be reused in backward.") + .AsIntermediate(); + AddOutput("Out", "Huber loss between input and target."); + AddAttr("delta", "Hyper parameter in huber loss."); + AddComment(R"DOC( +Huber loss is a loss function used in robust regression. We constrain shape of +input to (N, 1). The formulation is: + +L_delta(y, f(x)) = 0.5 * (y - f(x))^2 for |y - f(x)| <= delta, + delta * (|y - f(x)| - 0.5 * delta) otherwise. + +)DOC"); + } +}; + +class HuberLossGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* residual = ctx.Input("residual"); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* x_grad = ctx.Output(framework::GradVarName("X")); + auto* y_grad = ctx.Output(framework::GradVarName("Y")); + + PADDLE_ENFORCE_NOT_NULL(x, "Input X must not be null."); + PADDLE_ENFORCE_NOT_NULL(y, "Target Y must not be null."); + PADDLE_ENFORCE_NOT_NULL(residual, "Residual value must not be null."); + PADDLE_ENFORCE_NOT_NULL(out_grad, "Out gradient must not be null."); + + PADDLE_ENFORCE_EQ(residual->dims(), x->dims(), + "Dimension of X and residual value must be the same."); + PADDLE_ENFORCE_EQ( + out_grad->dims(), x->dims(), + "Dimension of Out gradient and X must be the same (N*1)."); + + if (x_grad) x_grad->Resize(x->dims()); + if (y_grad) y_grad->Resize(y->dims()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(huber_loss, ops::HuberLossOp, ops::HuberLossOpMaker, + huber_loss_grad, ops::HuberLossGradOp); +REGISTER_OP_CPU_KERNEL(huber_loss, + ops::HuberLossKernel); +REGISTER_OP_CPU_KERNEL( + huber_loss_grad, + ops::HuberLossGradKernel); diff --git a/paddle/operators/huber_loss_op.cu b/paddle/operators/huber_loss_op.cu new file mode 100644 index 0000000000000..317321dc6c495 --- /dev/null +++ b/paddle/operators/huber_loss_op.cu @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/huber_loss_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(huber_loss, + ops::HuberLossKernel); +REGISTER_OP_GPU_KERNEL( + huber_loss_grad, + ops::HuberLossGradKernel); diff --git a/paddle/operators/huber_loss_op.h b/paddle/operators/huber_loss_op.h new file mode 100644 index 0000000000000..61c64ea357234 --- /dev/null +++ b/paddle/operators/huber_loss_op.h @@ -0,0 +1,120 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "paddle/platform/hostdevice.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + +template +struct HuberLossForward { + HOSTDEVICE HuberLossForward(const T& delta) : delta(delta) {} + + HOSTDEVICE T operator()(const T& val) const { + T abs_val = std::abs(val); + if (abs_val <= delta) { + return 0.5 * val * val; + } else { + return delta * (abs_val - 0.5 * delta); + } + } + + T delta; +}; + +template +class HuberLossKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in0 = context.Input("X"); + auto* in1 = context.Input("Y"); + auto* out0 = context.Output("residual"); + auto* out1 = context.Output("Out"); + auto delta = static_cast(context.op().Attr("delta")); + auto place = context.GetEigenDevice(); + + auto x = EigenVector::Flatten(*in0); + auto y = EigenVector::Flatten(*in1); + out0->mutable_data(context.GetPlace()); + auto residual = EigenVector::Flatten(*out0); + residual.device(place) = y - x; + out1->mutable_data(context.GetPlace()); + auto loss = EigenVector::Flatten(*out1); + loss.device(place) = residual.unaryExpr(HuberLossForward(delta)); + } +}; + +template +struct HuberLossBackward { + HOSTDEVICE HuberLossBackward(const T& delta, bool is_x) + : is_x(is_x), delta(delta) {} + + HOSTDEVICE T operator()(const T& val) const { + T sign = is_x ? -1.0 : 1.0; + T abs_val = std::abs(val); + if (abs_val <= delta) { + return sign * val; + } else { + if (val > 0) { + return sign * delta; + } else { + return -1 * sign * delta; + } + } + } + + bool is_x; + T delta; +}; + +template +class HuberLossGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in0 = context.Input("residual"); + auto* in1 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + auto* out1 = context.Output(framework::GradVarName("Y")); + auto delta = static_cast(context.op().Attr("delta")); + auto place = context.GetEigenDevice(); + + auto residual = EigenVector::Flatten(*in0); + auto out_grad = EigenVector::Flatten(*in1); + + if (out0) { + out0->mutable_data(context.GetPlace()); + auto x_grad = EigenVector::Flatten(*out0); + x_grad.device(place) = + out_grad * residual.unaryExpr(HuberLossBackward(delta, true)); + } + + if (out1) { + out1->mutable_data(context.GetPlace()); + auto y_grad = EigenVector::Flatten(*out1); + y_grad.device(place) = + out_grad * residual.unaryExpr(HuberLossBackward(delta, false)); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 53985933ed143..130cf140aa8b4 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -51,6 +51,7 @@ USE_CPU_ONLY_OP(gather); USE_CPU_ONLY_OP(scatter); USE_OP(top_k); USE_OP(squared_l2_distance); +USE_OP(huber_loss); namespace paddle { namespace framework { diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index ef910f939be0b..5b9f4084ec077 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -35,3 +35,4 @@ py_test(test_lookup_table SRCS test_lookup_table.py) py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py) py_test(mnist SRCS mnist.py) py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py) +py_test(test_huber_loss_op SRCS test_huber_loss_op.py) diff --git a/python/paddle/v2/framework/tests/test_huber_loss_op.py b/python/paddle/v2/framework/tests/test_huber_loss_op.py new file mode 100644 index 0000000000000..540dedc35778f --- /dev/null +++ b/python/paddle/v2/framework/tests/test_huber_loss_op.py @@ -0,0 +1,56 @@ +import unittest +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op +from paddle.v2.framework.op import Operator +import numpy as np + + +def huber_loss_forward(val, delta): + abs_val = abs(val) + if abs_val <= delta: + return 0.5 * val * val + else: + return delta * (abs_val - 0.5 * delta) + + +class TestHuberLossOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = 'huber_loss' + samples_num = 64 + delta = 1.0 + self.inputs = { + 'X': np.random.uniform(0, 1., (samples_num, 1)).astype('float32'), + 'Y': np.random.uniform(0, 1., (samples_num, 1)).astype('float32'), + } + residual = self.inputs['Y'] - self.inputs['X'] + loss = np.vectorize(huber_loss_forward)(residual, delta) + self.attrs = {'delta': delta} + self.outputs = { + 'residual': residual, + 'Out': loss.reshape((samples_num, 1)) + } + + +class TestHuberLossGradOp(GradientChecker): + def test_huber_loss(self): + samples_num = 10 + delta = 1.0 + inputs = { + 'X': np.random.uniform(-1, 1, (samples_num, 1)).astype('float32'), + 'Y': np.random.uniform(-1, 1, (samples_num, 1)).astype('float32') + } + op = Operator( + "huber_loss", + X='X', + Y='Y', + residual='residual', + delta=delta, + Out='Out') + self.compare_grad(op, inputs, no_grad_set=set(['residual'])) + self.check_grad(op, inputs, set(["X", "Y"]), "Out") + + +if __name__ == '__main__': + unittest.main() From 5939a17c47246addb76d5273146ec38b6db19130 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 24 Oct 2017 20:51:20 +0800 Subject: [PATCH 2/3] Follow comments and adapt to new interface. --- paddle/operators/huber_loss_op.cc | 67 ++++++++++--------- paddle/operators/huber_loss_op.h | 17 +++-- .../v2/framework/tests/test_huber_loss_op.py | 6 +- 3 files changed, 47 insertions(+), 43 deletions(-) diff --git a/paddle/operators/huber_loss_op.cc b/paddle/operators/huber_loss_op.cc index 8c2ca86ccce09..2d9449f5ca50d 100644 --- a/paddle/operators/huber_loss_op.cc +++ b/paddle/operators/huber_loss_op.cc @@ -21,24 +21,24 @@ class HuberLossOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - protected: - void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must be initialized."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) must be initialized."); + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must be initialized."); + PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) must be initialized."); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); + auto x_dims = ctx->GetInputDim("X"); + auto y_dims = ctx->GetInputDim("Y"); - PADDLE_ENFORCE_EQ(x->dims(), y->dims()); - PADDLE_ENFORCE_EQ(framework::arity(x->dims()), 2, + PADDLE_ENFORCE_EQ(x_dims, y_dims); + PADDLE_ENFORCE_EQ(x_dims.size(), 2, "The rank of Input(X) must be 2 and the shape is " "[batch_size, 1]."); - PADDLE_ENFORCE_EQ(x->dims()[1], 1, + PADDLE_ENFORCE_EQ(x_dims[1], 1, "Each row of Input(X) contains a real value, " "so the 2nd dimension of Input(X) must be 1."); - ctx.Output("Residual")->Resize(x->dims()); - ctx.Output("Out")->Resize({x->dims()[0], 1}); + ctx->SetOutputDim("Residual", x_dims); + ctx->SetOutputDim("Out", {x_dims[0], 1}); + ctx->ShareLoD("X", "Out"); } }; @@ -55,7 +55,7 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker { "The target value of huber loss op." "Y is a 2-D tensor with shape [batch_size, 1]."); AddOutput("Residual", - "Intermediate tensor to cache residual value of Y and X." + "Intermediate tensor to cache residual value between Y and X." "The shape is same as Input(X) and will be reused in backward.") .AsIntermediate(); AddOutput("Out", @@ -82,25 +82,30 @@ class HuberLossGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - protected: - void InferShape(const framework::InferShapeContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* residual = ctx.Input("Residual"); - auto* out_grad = ctx.Input(framework::GradVarName("Out")); - auto* x_grad = ctx.Output(framework::GradVarName("X")); - auto* y_grad = ctx.Output(framework::GradVarName("Y")); - - PADDLE_ENFORCE_NOT_NULL(x, "Input(X) should not be null."); - PADDLE_ENFORCE_NOT_NULL(y, "Input(Y) should not be null."); - PADDLE_ENFORCE_NOT_NULL(residual, "Input(Residual) should not be null."); - PADDLE_ENFORCE_NOT_NULL(out_grad, "Input(Out@GRAD) should not be null."); - - PADDLE_ENFORCE_EQ(residual->dims(), x->dims()); - PADDLE_ENFORCE_EQ(out_grad->dims(), x->dims()); - - if (x_grad) x_grad->Resize(x->dims()); - if (y_grad) y_grad->Resize(y->dims()); + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Residual"), + "Input(Residual) should not be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null."); + + auto x_dims = ctx->GetInputDim("X"); + auto y_dims = ctx->GetInputDim("Y"); + auto residual_dims = ctx->GetInputDim("Residual"); + auto out_grad_dims = ctx->GetInputDim(framework::GradVarName("Out")); + + PADDLE_ENFORCE_EQ(residual_dims, x_dims); + PADDLE_ENFORCE_EQ(out_grad_dims, x_dims); + + auto x_grad_name = framework::GradVarName("X"); + auto y_grad_name = framework::GradVarName("Y"); + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + } + if (ctx->HasOutput(y_grad_name)) { + ctx->SetOutputDim(y_grad_name, y_dims); + } } }; diff --git a/paddle/operators/huber_loss_op.h b/paddle/operators/huber_loss_op.h index 6913141bde38e..d8a2da52f508c 100644 --- a/paddle/operators/huber_loss_op.h +++ b/paddle/operators/huber_loss_op.h @@ -42,14 +42,14 @@ struct HuberLossForward { }; template -class HuberLossKernel : public framework::OpKernel { +class HuberLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("X"); auto* in1 = context.Input("Y"); auto* out0 = context.Output("Residual"); auto* out1 = context.Output("Out"); - auto delta = static_cast(context.op().Attr("delta")); + auto delta = static_cast(context.Attr("delta")); auto place = context.GetEigenDevice(); auto x = EigenVector::Flatten(*in0); @@ -65,11 +65,10 @@ class HuberLossKernel : public framework::OpKernel { template struct HuberLossBackward { - HOSTDEVICE HuberLossBackward(const T& delta, bool is_x) - : is_x(is_x), delta(delta) {} + HOSTDEVICE HuberLossBackward(const T& delta, T sign) + : sign(sign), delta(delta) {} HOSTDEVICE T operator()(const T& val) const { - T sign = is_x ? -1.0 : 1.0; T abs_val = std::abs(val); if (abs_val <= delta) { return sign * val; @@ -82,12 +81,12 @@ struct HuberLossBackward { } } - bool is_x; + T sign; T delta; }; template -class HuberLossGradKernel : public framework::OpKernel { +class HuberLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("Residual"); @@ -104,14 +103,14 @@ class HuberLossGradKernel : public framework::OpKernel { out0->mutable_data(context.GetPlace()); auto x_grad = EigenVector::Flatten(*out0); x_grad.device(place) = - out_grad * residual.unaryExpr(HuberLossBackward(delta, true)); + out_grad * residual.unaryExpr(HuberLossBackward(delta, -1.0)); } if (out1) { out1->mutable_data(context.GetPlace()); auto y_grad = EigenVector::Flatten(*out1); y_grad.device(place) = - out_grad * residual.unaryExpr(HuberLossBackward(delta, false)); + out_grad * residual.unaryExpr(HuberLossBackward(delta, 1.0)); } } }; diff --git a/python/paddle/v2/framework/tests/test_huber_loss_op.py b/python/paddle/v2/framework/tests/test_huber_loss_op.py index ff0a17c184d1b..b2f102d4fc02a 100644 --- a/python/paddle/v2/framework/tests/test_huber_loss_op.py +++ b/python/paddle/v2/framework/tests/test_huber_loss_op.py @@ -32,15 +32,15 @@ def test_check_output(self): self.check_output() def test_check_grad_normal(self): - self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05) + self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.008) def test_check_grad_ingore_x(self): self.check_grad( - ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("residual")) + ['Y'], 'Out', max_relative_error=0.008, no_grad_set=set("residual")) def test_check_grad_ingore_y(self): self.check_grad( - ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('residual')) + ['X'], 'Out', max_relative_error=0.008, no_grad_set=set('residual')) if __name__ == '__main__': From ac5f42184f56029631a29e1c62b1b527c4cd0bfc Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 27 Oct 2017 11:54:50 +0800 Subject: [PATCH 3/3] Using static_cast to make more robust. --- paddle/operators/huber_loss_op.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/operators/huber_loss_op.h b/paddle/operators/huber_loss_op.h index d8a2da52f508c..4e7bc5543226e 100644 --- a/paddle/operators/huber_loss_op.h +++ b/paddle/operators/huber_loss_op.h @@ -32,9 +32,9 @@ struct HuberLossForward { HOSTDEVICE T operator()(const T& val) const { T abs_val = std::abs(val); if (abs_val <= delta) { - return 0.5 * val * val; + return static_cast(0.5) * val * val; } else { - return delta * (abs_val - 0.5 * delta); + return delta * (abs_val - static_cast(0.5) * delta); } }