Skip to content

Commit

Permalink
add gradient test framework (#3226)
Browse files Browse the repository at this point in the history
* init grad op checker

* can run

* add GradeChecker class

* use get_numeric_gradient

* refine code

* add softmax and cross entropy auto grad test

* use close to judge op_grad and numeric_grad

* add cpu and gpu compare

* add comments

* add support_gpu

* fix allclose

* fix name error and symplify code

* optimize gradient checker

* add test_cross_entropy_op

* update gradient_checker.py

* optimize code

* use random.uniform instead of random.random

* fix type bug

* optimize check_grad

* put SupportGPU into OperatorBase

* typo
  • Loading branch information
jacquesqiao authored Aug 8, 2017
1 parent 6540701 commit e31a469
Show file tree
Hide file tree
Showing 10 changed files with 214 additions and 76 deletions.
6 changes: 0 additions & 6 deletions paddle/framework/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,12 +260,6 @@ class OpRegistry {
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}

static bool SupportGPU(const std::string& op_type) {
OperatorWithKernel::OpKernelKey key;
key.place_ = platform::GPUPlace();
return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0;
}

static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op) {
PADDLE_ENFORCE(!op.IsNetOp(),
"Use framework::Backward to get backward ops");
Expand Down
10 changes: 9 additions & 1 deletion paddle/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ class OperatorBase {

virtual bool IsNetOp() const { return false; }

virtual bool SupportGPU() const { return false; }

/// rename inputs outputs name
void Rename(const std::string& old_name, const std::string& new_name);

Expand Down Expand Up @@ -308,7 +310,7 @@ class OperatorWithKernel : public OperatorBase {
using OpKernelMap =
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;

void InferShape(const Scope& scope) const {
void InferShape(const Scope& scope) const override {
InferShape(InferShapeContext(this, scope));
}

Expand All @@ -324,6 +326,12 @@ class OperatorWithKernel : public OperatorBase {
return g_all_op_kernels;
}

bool SupportGPU() const override {
OperatorWithKernel::OpKernelKey key;
key.place_ = platform::GPUPlace();
return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0;
}

protected:
virtual void InferShape(const InferShapeContext& ctx) const = 0;
};
Expand Down
22 changes: 20 additions & 2 deletions paddle/framework/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,26 @@ void ExposeOperator(ClassType &m) {
[](const typename ClassType::type &op) -> std::vector<std::string> {
return op.outputs_;
})
.def("inputs",
[](const typename ClassType::type &op) -> std::vector<std::string> {
return op.inputs_;
})
.def("support_gpu", &ClassType::type::SupportGPU)
.def("temp_outputs",
[](const typename ClassType::type &op) -> std::vector<std::string> {
auto iter = op.attrs_.find("temporary_index");
std::vector<std::string> ret;
if (iter == op.attrs_.end()) {
return ret;
} else {
auto tmp_idx = boost::get<std::vector<int>>(iter->second);
ret.reserve(tmp_idx.size());
for (auto &index : tmp_idx) {
ret.push_back(op.outputs_.at(index));
}
return ret;
}
})
.def("__str__", &ClassType::type::DebugString);
}

Expand Down Expand Up @@ -202,8 +222,6 @@ All parameter, weight, gradient are variables in Paddle.
return OpRegistry::CreateOp(desc);
});

operator_base.def_static("support_gpu", &OpRegistry::SupportGPU);

operator_base.def("backward",
[](const OperatorBase &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) {
Expand Down
3 changes: 2 additions & 1 deletion paddle/operators/cross_entropy_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp,
ops::OnehotCrossEntropyOpMaker);
REGISTER_OP_CPU_KERNEL(onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<ops::CPUPlace, float>);

REGISTER_GRADIENT_OP(onehot_cross_entropy, onehot_cross_entropy_grad,
ops::OnehotCrossEntropyGradientOp);
REGISTER_OP_CPU_KERNEL(
onehot_cross_entropy_grad,
ops::OnehotCrossEntropyGradientOpKernel<ops::CPUPlace, float>);
9 changes: 9 additions & 0 deletions paddle/operators/net_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ class NetOp : public framework::OperatorBase {
}
}

bool SupportGPU() const override {
for (auto& op : ops_) {
if (!op->SupportGPU()) {
return false;
}
}
return true;
}

/**
* @brief Add an operator by ptr
*/
Expand Down
1 change: 1 addition & 0 deletions python/paddle/v2/framework/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ py_test(test_protobuf SRCS test_protobuf.py)
py_test(test_add_two_op SRCS test_add_two_op.py)
py_test(test_sigmoid_op SRCS test_sigmoid_op.py)
py_test(test_softmax_op SRCS test_softmax_op.py)
py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py)
py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)

py_test(gradient_checker SRCS gradient_checker.py)
Expand Down
152 changes: 149 additions & 3 deletions python/paddle/v2/framework/tests/gradient_checker.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,31 @@
import unittest

import numpy
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
import numpy
import unittest

__all__ = ['get_numeric_gradient']


def create_op(op_type):
kwargs = dict()
for in_name in Operator.get_op_input_names(op_type):
kwargs[in_name] = in_name
for out_name in Operator.get_op_output_names(op_type):
kwargs[out_name] = out_name

return Operator(op_type, **kwargs)


def grad_var_name(var_name):
return var_name + "@GRAD"


def get_numeric_gradient(op,
input_values,
output_name,
input_to_check,
delta=1e-2,
delta=0.005,
local_scope=None):
"""
Get Numeric Gradient for an operator's input.
Expand Down Expand Up @@ -76,6 +91,113 @@ def product(dim):
return gradient_flat.reshape(tensor_to_check.get_dims())


class GradientChecker(unittest.TestCase):
def __is_close(self, numeric_grads, scope, max_relative_error):
for name in numeric_grads:
op_grad = numpy.array(
scope.find_var(grad_var_name(name)).get_tensor())
is_close = numpy.allclose(
numeric_grads[name], op_grad, rtol=max_relative_error, atol=100)
if not is_close:
return False
return True

def check_grad(self,
forward_op,
input_vars,
inputs_to_check,
output_name,
no_grad_set=None,
only_cpu=False,
max_relative_error=0.005):
"""
:param forward_op: used to create backward_op
:param input_vars: numpy value of input variable. The following
computation will use these variables.
:param inputs_to_check: inputs var names that should check gradient.
:param output_name: output name that used to
:param max_relative_error: The relative tolerance parameter.
:param no_grad_set: used when create backward ops
:param only_cpu: only compute and check gradient on cpu kernel.
:return:
"""
if no_grad_set is None:
no_grad_set = set()

tmp_outs = forward_op.temp_outputs()
no_tmp_out = filter(lambda name: name not in tmp_outs,
forward_op.outputs())
if len(no_tmp_out) != 1:
raise ValueError("non temp out_names should be 1")

in_names = forward_op.inputs()
for no_grad in no_grad_set:
if no_grad not in in_names:
raise ValueError("no_grad should be in in_names")

backward_op = core.Operator.backward(forward_op, no_grad_set)

places = [core.CPUPlace()]
if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu():
places.append(core.GPUPlace(0))

numeric_grad = dict()
# get numeric gradient
for check_name in inputs_to_check:
numeric_grad[check_name] = \
get_numeric_gradient(forward_op, input_vars, output_name, check_name)

# get operator gradient according to different device
for place in places:
scope = core.Scope()
ctx = core.DeviceContext.create(place)

# create input var and set value
for name, value in input_vars.iteritems():
if name not in in_names:
raise ValueError(name + " not in op.inputs_")
var = scope.new_var(name).get_tensor()
var.set_dims(value.shape)
var.set(value, place)

# create output var
for out_name in forward_op.outputs():
scope.new_var(out_name).get_tensor()

# infer the shape of output var and compute/set value of output var
forward_op.infer_shape(scope)
forward_op.run(scope, ctx)

# create output grad var
# set shape as the output var
# set value of this grad to ones
for name in forward_op.outputs():
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
grad_tensor.set_dims(out_tensor.shape())
data = 1.0 * numpy.ones(out_tensor.shape())
grad_tensor.set(data, place)

# create input grad var
for name in backward_op.outputs():
scope.new_var(name).get_tensor()

# infer the shape of input gradient var and compute/set it's value
# with backward op
backward_op.infer_shape(scope)
backward_op.run(scope, ctx)

if isinstance(place, core.CPUPlace):
msg = "CPU kernel gradient is not close to numeric gradient"
else:
if isinstance(place, core.GPUPlace):
msg = "GPU kernel gradient is not close to numeric gradient"
else:
raise ValueError("unknown place " + type(place))
self.assertTrue(
self.__is_close(numeric_grad, scope, max_relative_error), msg)


if __name__ == '__main__':

class GetNumericGradientTest(unittest.TestCase):
Expand All @@ -87,4 +209,28 @@ def test_add_op(self):
arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2)

def test_softmax_op(self):
def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way."""
shiftx = x - numpy.max(x)
exps = numpy.exp(shiftx)
return exps / numpy.sum(exps)

def label_softmax_grad(Y, dY):
dX = Y * 0.0
for i in range(Y.shape[0]):
d = numpy.dot(Y[i, :], dY[i, :])
dX[i, :] = Y[i, :] * (dY[i, :] - d)
return dX

softmax_op = Operator("softmax", X="X", Y="Y")

X = numpy.random.random((2, 2)).astype("float32")
Y = numpy.apply_along_axis(stable_softmax, 1, X)
dY = numpy.ones(Y.shape)
dX = label_softmax_grad(Y, dY)

arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X')
numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)

unittest.main()
7 changes: 4 additions & 3 deletions python/paddle/v2/framework/tests/op_test_util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import paddle.v2.framework.core as core
import unittest
import numpy
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator


Expand All @@ -24,7 +23,7 @@ def test_all(self):
scope = core.Scope()
kwargs = dict()
places = [core.CPUPlace()]
if core.is_compile_gpu() and core.Operator.support_gpu(self.type):
if core.is_compile_gpu():
places.append(core.GPUPlace(0))

for place in places:
Expand Down Expand Up @@ -53,6 +52,8 @@ def test_all(self):
kwargs[attr_name] = self.attrs[attr_name]

op = Operator(self.type, **kwargs)
if isinstance(place, core.GPUPlace) and not op.support_gpu():
return

op.infer_shape(scope)

Expand Down
16 changes: 14 additions & 2 deletions python/paddle/v2/framework/tests/test_cross_entropy_op.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import unittest
import numpy
from op_test_util import OpTestMeta
from gradient_checker import GradientChecker, create_op


class TestSGD(unittest.TestCase):
class TestCrossEntropy(unittest.TestCase):
__metaclass__ = OpTestMeta

def setUp(self):
Expand All @@ -20,7 +21,18 @@ def setUp(self):
self.outputs = {'Y': numpy.array(Y).astype("float32")}


# TODO(superjom) add gradient check
class CrossEntropyGradOpTest(GradientChecker):
def test_softmax_grad(self):
op = create_op("onehot_cross_entropy")
batch_size = 100
class_num = 10
inputs = {
"X": numpy.random.uniform(
0.1, 1.0, [batch_size, class_num]).astype("float32"),
"label": (class_num / 2) * numpy.ones(batch_size).astype("int32")
}
self.check_grad(op, inputs, set("X"), "Y")


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit e31a469

Please sign in to comment.