fix for bug apache#10868: _backward_softsign activation is incorrect (a…

…pache#11827) * fix for bug apache#10868: _backward_softsign activation is incorrect problem was that softsign was computed using outputs instead of inputs added inputs to list, and changed what gets passed into softsign calculation added softsign test to test_activation function code reviewed by anirudh * fix for bug apache#10868: _backward_softsign activation is incorrect problem was that softsign was computed using outputs instead of inputs added inputs to list, and changed what gets passed into softsign calculation added softsign test to test_activation function code reviewed by anirudh **amended to change tab to spaces * rerunning the CI build * fixed size checks for when USE_MKLDNN=ON
XinYao1994 · Jul 20, 2018 · 206dea4 · 206dea4
1 parent 3ac7091
commit 206dea4
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 11 deletions.
diff --git a/src/operator/nn/activation-inl.h b/src/operator/nn/activation-inl.h
@@ -174,7 +174,7 @@ void ActivationGradComputeImpl(const nnvm::NodeAttrs& attrs, const OpContext &ct
       break;
     case activation::kSoftSign:
       ActivationBackward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
-          ctx, inputs[0], inputs[1], req[0], outputs[0]);
+          ctx, inputs[0], inputs[2], req[0], outputs[0]);
       break;
     default:
       LOG(FATAL) << "unknown activation type";
@@ -198,12 +198,13 @@ void ActivationGradCompute(const nnvm::NodeAttrs& attrs,
                            const std::vector<TBlob>& inputs,
                            const std::vector<OpReqType>& req,
                            const std::vector<TBlob>& outputs) {
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
+#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   bool relu = param.act_type == activation::kReLU;
   CHECK_EQ(inputs.size(), relu ? 2U : 3U);
 #else
-  CHECK_EQ(inputs.size(), 2U);
+  bool softsign = param.act_type == activation::kSoftSign;
+  CHECK_EQ(inputs.size(), softsign ? 3U : 2U);
 #endif
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);

diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc
@@ -44,11 +44,19 @@ struct ActivationGrad {
                                           const std::vector<nnvm::NodeEntry>& ograds) const {
     std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end());
     heads.emplace_back(nnvm::NodeEntry{n, activation::kOut, 0});
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
+
     const NodeAttrs& attrs = n->attrs;
+    int act_type = dmlc::get<ActivationParam>(attrs.parsed).act_type;
+    if (act_type == activation::kSoftSign) {
+      // for softsign need the inputs to compute the activation.
+      heads.push_back(n->inputs[activation::kData]);
+    }
+
+#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
     // for ReLU, no need to pass input data. This enables inplace optimization during the
     // forward pass.
-    if (dmlc::get<ActivationParam>(attrs.parsed).act_type != activation::kReLU) {
+    if (act_type != activation::kReLU &&
+        act_type != activation::kSoftSign) {
       heads.push_back(n->inputs[activation::kData]);
     }
 #endif
@@ -118,8 +126,8 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
                                           std::vector<int> *in_attrs,
                                           std::vector<int> *out_attrs) {
   bool ret = false;
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
+#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   if (param.act_type != activation::kReLU) {
     CHECK_EQ(in_attrs->size(), 3U);
     ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
@@ -133,10 +141,17 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
                                                          in_attrs, out_attrs);
   }
 #else
-  CHECK_EQ(in_attrs->size(), 2U);
-  ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
-                                                       dispatch_mode,
-                                                       in_attrs, out_attrs);
+  if (param.act_type == activation::kSoftSign) {
+    CHECK_EQ(in_attrs->size(), 3U);
+    ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
+                                                         dispatch_mode,
+                                                         in_attrs, out_attrs);
+  } else {
+    CHECK_EQ(in_attrs->size(), 2U);
+    ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
+                                                         dispatch_mode,
+                                                         in_attrs, out_attrs);
+  }
 #endif
   CHECK_EQ(out_attrs->size(), 1U);
 #if MXNET_USE_MKLDNN == 1

diff --git a/src/operator/nn/activation.cu b/src/operator/nn/activation.cu
@@ -87,7 +87,7 @@ void ActivationGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
       ctx, inputs[0], inputs[1], req[0], outputs[0]);
   } else if (param.act_type == activation::kSoftSign) {
     ActivationBackward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
-      ctx, inputs[0], inputs[1], req[0], outputs[0]);
+      ctx, inputs[0], inputs[2], req[0], outputs[0]);
   } else {
     MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
       // XXX: for y = relu(x), y is passed as "in_data" to Backward()

diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
@@ -6852,6 +6852,10 @@ def test_activation():
                     lambda x: np.log(1. + np.exp(x)),
                     lambda x: 1. - 1 / (1 + np.exp(x)),
                     -3.0, 3.0],
+        'softsign': [lambda x: mx.sym.Activation(x, act_type='softsign'),
+                     lambda x: x / (1. + np.abs(x)),
+                     lambda x: 1. / np.square(1. + np.abs(x)),
+                     -3.0, 3.0],
     }
     # Loop over operators
     for name, op in unary_ops.items():