PaddlePaddle · jacquesqiao · Aug 8, 2017 · Aug 4, 2017 · Aug 4, 2017 · Aug 4, 2017
diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h
@@ -260,12 +260,6 @@ class OpRegistry {
     return CreateOp(op_desc.type(), inputs, outputs, attrs);
   }
 
-  static bool SupportGPU(const std::string& op_type) {
-    OperatorWithKernel::OpKernelKey key;
-    key.place_ = platform::GPUPlace();
-    return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0;
-  }
-
   static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op) {
     PADDLE_ENFORCE(!op.IsNetOp(),
                    "Use framework::Backward to get backward ops");

diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
@@ -88,6 +88,8 @@ class OperatorBase {
 
   virtual bool IsNetOp() const { return false; }
 
+  virtual bool SupportGPU() const { return false; }
+
   /// rename inputs outputs name
   void Rename(const std::string& old_name, const std::string& new_name);
 
@@ -308,7 +310,7 @@ class OperatorWithKernel : public OperatorBase {
   using OpKernelMap =
       std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
 
-  void InferShape(const Scope& scope) const {
+  void InferShape(const Scope& scope) const override {
     InferShape(InferShapeContext(this, scope));
   }
 
@@ -324,6 +326,12 @@ class OperatorWithKernel : public OperatorBase {
     return g_all_op_kernels;
   }
 
+  bool SupportGPU() const override {
+    OperatorWithKernel::OpKernelKey key;
+    key.place_ = platform::GPUPlace();
+    return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0;
+  }
+
  protected:
   virtual void InferShape(const InferShapeContext& ctx) const = 0;
 };

diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
@@ -56,6 +56,26 @@ void ExposeOperator(ClassType &m) {
            [](const typename ClassType::type &op) -> std::vector<std::string> {
              return op.outputs_;
            })
+      .def("inputs",
+           [](const typename ClassType::type &op) -> std::vector<std::string> {
+             return op.inputs_;
+           })
+      .def("support_gpu", &ClassType::type::SupportGPU)
+      .def("temp_outputs",
+           [](const typename ClassType::type &op) -> std::vector<std::string> {
+             auto iter = op.attrs_.find("temporary_index");
+             std::vector<std::string> ret;
+             if (iter == op.attrs_.end()) {
+               return ret;
+             } else {
+               auto tmp_idx = boost::get<std::vector<int>>(iter->second);
+               ret.reserve(tmp_idx.size());
+               for (auto &index : tmp_idx) {
+                 ret.push_back(op.outputs_.at(index));
+               }
+               return ret;
+             }
+           })
       .def("__str__", &ClassType::type::DebugString);
 }
 
@@ -201,8 +221,6 @@ All parameter, weight, gradient are variables in Paddle.
     return OpRegistry::CreateOp(desc);
   });
 
-  operator_base.def_static("support_gpu", &OpRegistry::SupportGPU);
-
   operator_base.def("backward",
                     [](const OperatorBase &forwardOp,
                        const std::unordered_set<std::string> &no_grad_vars) {

diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc
@@ -70,7 +70,8 @@ REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp,
             ops::OnehotCrossEntropyOpMaker);
 REGISTER_OP_CPU_KERNEL(onehot_cross_entropy,
                        ops::OnehotCrossEntropyOpKernel<ops::CPUPlace, float>);
-
+REGISTER_GRADIENT_OP(onehot_cross_entropy, onehot_cross_entropy_grad,
+                     ops::OnehotCrossEntropyGradientOp);
 REGISTER_OP_CPU_KERNEL(
     onehot_cross_entropy_grad,
     ops::OnehotCrossEntropyGradientOpKernel<ops::CPUPlace, float>);
diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h
@@ -65,6 +65,15 @@ class NetOp : public framework::OperatorBase {
     }
   }
 
+  bool SupportGPU() const override {
+    for (auto& op : ops_) {
+      if (!op->SupportGPU()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   /**
    * @brief Add an operator by ptr
    */

diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -13,6 +13,7 @@ py_test(test_protobuf SRCS test_protobuf.py)
 py_test(test_add_two_op SRCS test_add_two_op.py)
 py_test(test_sigmoid_op SRCS test_sigmoid_op.py)
 py_test(test_softmax_op SRCS test_softmax_op.py)
+py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py)
 py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
 
 py_test(gradient_checker SRCS gradient_checker.py)

diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -1,16 +1,31 @@
+import unittest
+
+import numpy
 import paddle.v2.framework.core as core
 from paddle.v2.framework.op import Operator
-import numpy
-import unittest
 
 __all__ = ['get_numeric_gradient']
 
 
+def create_op(op_type):
+    kwargs = dict()
+    for in_name in Operator.get_op_input_names(op_type):
+        kwargs[in_name] = in_name
+    for out_name in Operator.get_op_output_names(op_type):
+        kwargs[out_name] = out_name
+
+    return Operator(op_type, **kwargs)
+
+
+def grad_var_name(var_name):
+    return var_name + "@GRAD"
+
+
 def get_numeric_gradient(op,
                          input_values,
                          output_name,
                          input_to_check,
-                         delta=1e-2,
+                         delta=0.005,
                          local_scope=None):
     """
     Get Numeric Gradient for an operator's input.
@@ -76,6 +91,113 @@ def product(dim):
     return gradient_flat.reshape(tensor_to_check.get_dims())
 
 
+class GradientChecker(unittest.TestCase):
+    def __is_close(self, numeric_grads, scope, max_relative_error):
+        for name in numeric_grads:
+            op_grad = numpy.array(
+                scope.find_var(grad_var_name(name)).get_tensor())
+            is_close = numpy.allclose(
+                numeric_grads[name], op_grad, rtol=max_relative_error, atol=100)
+            if not is_close:
+                return False
+        return True
+
+    def check_grad(self,
+                   forward_op,
+                   input_vars,
+                   inputs_to_check,
+                   output_name,
+                   no_grad_set=None,
+                   only_cpu=False,
+                   max_relative_error=0.005):
+        """
+        :param forward_op: used to create backward_op
+        :param input_vars: numpy value of input variable. The following
+            computation will use these variables.
+        :param inputs_to_check: inputs var names that should check gradient.
+        :param output_name: output name that used to
+        :param max_relative_error: The relative tolerance parameter.
+        :param no_grad_set: used when create backward ops
+        :param only_cpu: only compute and check gradient on cpu kernel.
+        :return:
+        """
+        if no_grad_set is None:
+            no_grad_set = set()
+
+        tmp_outs = forward_op.temp_outputs()
+        no_tmp_out = filter(lambda name: name not in tmp_outs,
+                            forward_op.outputs())
+        if len(no_tmp_out) != 1:
+            raise ValueError("non temp out_names should be 1")
+
+        in_names = forward_op.inputs()
+        for no_grad in no_grad_set:
+            if no_grad not in in_names:
+                raise ValueError("no_grad should be in in_names")
+
+        backward_op = core.Operator.backward(forward_op, no_grad_set)
+
+        places = [core.CPUPlace()]
+        if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu():
+            places.append(core.GPUPlace(0))
+
+        numeric_grad = dict()
+        # get numeric gradient
+        for check_name in inputs_to_check:
+            numeric_grad[check_name] = \
+                get_numeric_gradient(forward_op, input_vars, output_name, check_name)
+
+        # get operator gradient according to different device
+        for place in places:
+            scope = core.Scope()
+            ctx = core.DeviceContext.create(place)
+
+            # create input var and set value
+            for name, value in input_vars.iteritems():
+                if name not in in_names:
+                    raise ValueError(name + " not in op.inputs_")
+                var = scope.new_var(name).get_tensor()
+                var.set_dims(value.shape)
+                var.set(value, place)
+
+            # create output var
+            for out_name in forward_op.outputs():
+                scope.new_var(out_name).get_tensor()
+
+            # infer the shape of output var and compute/set value of output var
+            forward_op.infer_shape(scope)
+            forward_op.run(scope, ctx)
+
+            # create output grad var
+            # set shape as the output var
+            # set value of this grad to ones
+            for name in forward_op.outputs():
+                out_tensor = scope.find_var(name).get_tensor()
+                grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
+                grad_tensor.set_dims(out_tensor.shape())
+                data = 1.0 * numpy.ones(out_tensor.shape())
+                grad_tensor.set(data, place)
+
+            # create input grad var
+            for name in backward_op.outputs():
+                scope.new_var(name).get_tensor()
+
+            # infer the shape of input gradient var and compute/set it's value
+            # with backward op
+            backward_op.infer_shape(scope)
+            backward_op.run(scope, ctx)
+
+            if isinstance(place, core.CPUPlace):
+                msg = "CPU kernel gradient is not close to numeric gradient"
+            else:
+                if isinstance(place, core.GPUPlace):
+                    msg = "CPU kernel gradient is not close to numeric gradient"
+                else:
+                    raise ValueError("unknown place " + type(place))
+            self.assertTrue(
+                self.__is_close(numeric_grad, scope, max_relative_error), msg)
+
+
 if __name__ == '__main__':
 
     class GetNumericGradientTest(unittest.TestCase):
@@ -87,4 +209,28 @@ def test_add_op(self):
             arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')
             self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2)
 
+        def test_softmax_op(self):
+            def stable_softmax(x):
+                """Compute the softmax of vector x in a numerically stable way."""
+                shiftx = x - numpy.max(x)
+                exps = numpy.exp(shiftx)
+                return exps / numpy.sum(exps)
+
+            def label_softmax_grad(Y, dY):
+                dX = Y * 0.0
+                for i in range(Y.shape[0]):
+                    d = numpy.dot(Y[i, :], dY[i, :])
+                    dX[i, :] = Y[i, :] * (dY[i, :] - d)
+                return dX
+
+            softmax_op = Operator("softmax", X="X", Y="Y")
+
+            X = numpy.random.random((2, 2)).astype("float32")
+            Y = numpy.apply_along_axis(stable_softmax, 1, X)
+            dY = numpy.ones(Y.shape)
+            dX = label_softmax_grad(Y, dY)
+
+            arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X')
+            numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)
+
     unittest.main()
diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py
@@ -1,6 +1,5 @@
-import paddle.v2.framework.core as core
-import unittest
 import numpy
+import paddle.v2.framework.core as core
 from paddle.v2.framework.op import Operator
 
 
@@ -24,7 +23,7 @@ def test_all(self):
             scope = core.Scope()
             kwargs = dict()
             places = [core.CPUPlace()]
-            if core.is_compile_gpu() and core.Operator.support_gpu(self.type):
+            if core.is_compile_gpu():
                 places.append(core.GPUPlace(0))
 
             for place in places:
@@ -53,6 +52,8 @@ def test_all(self):
                         kwargs[attr_name] = self.attrs[attr_name]
 
                 op = Operator(self.type, **kwargs)
+                if isinstance(place, core.GPUPlace) and not op.support_gpu():
+                    return
 
                 op.infer_shape(scope)
 

diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py
@@ -1,9 +1,10 @@
 import unittest
 import numpy
 from op_test_util import OpTestMeta
+from gradient_checker import GradientChecker, create_op
 
 
-class TestSGD(unittest.TestCase):
+class TestCrossEntropy(unittest.TestCase):
     __metaclass__ = OpTestMeta
 
     def setUp(self):
@@ -20,7 +21,18 @@ def setUp(self):
         self.outputs = {'Y': numpy.array(Y).astype("float32")}
 
 
-# TODO(superjom) add gradient check
+class CrossEntropyGradOpTest(GradientChecker):
+    def test_softmax_grad(self):
+        op = create_op("onehot_cross_entropy")
+        batch_size = 100
+        class_num = 10
+        inputs = {
+            "X": numpy.random.uniform(
+                0.1, 1.0, [batch_size, class_num]).astype("float32"),
+            "label": (class_num / 2) * numpy.ones(batch_size).astype("int32")
+        }
+        self.check_grad(op, inputs, set("X"), "Y")
+
 
 if __name__ == "__main__":
     unittest.main()