Fix docs for some NDArray functions (apache#5808)

* sort,argsort,topk,argmin,argmax,argmax_channel docs modified * some sentence structure changes * revert some changes made * minor change * sentence restructering * sequencelast modified * minor word changes * formatting fixes * SequenceMask added * docs for SequenceReverse,true_divide is added * line break * minor change * take, batch_take operators added * minor formatting changes * Changes after review * note modified
anirudh2290 · Apr 14, 2017 · 17bfe18 · 17bfe18
1 parent 7033cae
commit 17bfe18
Show file tree

Hide file tree

Showing 15 changed files with 355 additions and 126 deletions.
diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py
@@ -1210,8 +1210,8 @@ def add(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1271,8 +1271,8 @@ def subtract(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1331,8 +1331,8 @@ def multiply(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1391,8 +1391,8 @@ def divide(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1448,8 +1448,8 @@ def power(base, exp):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1508,8 +1508,8 @@ def maximum(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1564,8 +1564,8 @@ def minimum(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1614,7 +1614,7 @@ def minimum(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def equal(lhs, rhs):
-    """Returns the result of element-wise **equal to**(==) comparison operation with
+    """Returns the result of element-wise **equal to** (==) comparison operation with
     broadcasting.
 
     For each element in input arrays, return 1(true) if corresponding elements are same,
@@ -1624,8 +1624,8 @@ def equal(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1677,7 +1677,7 @@ def equal(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def not_equal(lhs, rhs):
-    """Returns the result of element-wise **not equal to**(!=) comparison operation
+    """Returns the result of element-wise **not equal to** (!=) comparison operation
     with broadcasting.
 
     For each element in input arrays, return 1(true) if corresponding elements are different,
@@ -1687,8 +1687,8 @@ def not_equal(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1743,7 +1743,7 @@ def not_equal(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def greater(lhs, rhs):
-    """Returns the result of element-wise **greater than**(>) comparison operation
+    """Returns the result of element-wise **greater than** (>) comparison operation
     with broadcasting.
 
     For each element in input arrays, return 1(true) if lhs elements are greater than rhs,
@@ -1753,8 +1753,8 @@ def greater(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1806,7 +1806,7 @@ def greater(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def greater_equal(lhs, rhs):
-    """Returns the result of element-wise **greater than or equal to**(>=) comparison
+    """Returns the result of element-wise **greater than or equal to** (>=) comparison
     operation with broadcasting.
 
     For each element in input arrays, return 1(true) if lhs elements are greater than equal to rhs,
@@ -1816,8 +1816,8 @@ def greater_equal(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1869,7 +1869,8 @@ def greater_equal(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def lesser(lhs, rhs):
-    """Returns the result of element-wise **lesser than**(<) comparison operation with broadcasting.
+    """Returns the result of element-wise **lesser than** (<) comparison operation
+    with broadcasting.
 
     For each element in input arrays, return 1(true) if lhs elements are less than rhs,
     otherwise return 0(false).
@@ -1878,8 +1879,8 @@ def lesser(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1932,7 +1933,7 @@ def lesser(lhs, rhs):
 
 
 def lesser_equal(lhs, rhs):
-    """Returns the result of element-wise **lesser than or equal to**(<=) comparison
+    """Returns the result of element-wise **lesser than or equal to** (<=) comparison
     operation with broadcasting.
 
     For each element in input arrays, return 1(true) if lhs elements are
@@ -1942,8 +1943,8 @@ def lesser_equal(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1995,7 +1996,8 @@ def lesser_equal(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def true_divide(lhs, rhs):
-    """Same as ``divide``.
+
+    """This function is similar to :meth:`divide`.
     """
     return divide(lhs, rhs)
 

diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h
@@ -34,7 +34,7 @@ struct SequenceLastParam : public dmlc::Parameter<SequenceLastParam> {
     DMLC_DECLARE_FIELD(use_sequence_length)
         .set_default(false)
         .describe(
-            "If set to true, this layer takes in extra input sequence_length "
+            "If set to true, this layer takes in an extra input parameter `sequence_length` "
             "to specify variable length sequence");
   }
 };

diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc
@@ -30,20 +30,56 @@ Operator *SequenceLastProp::CreateOperatorEx(Context ctx,
 DMLC_REGISTER_PARAMETER(SequenceLastParam);
 
 MXNET_REGISTER_OP_PROPERTY(SequenceLast, SequenceLastProp)
-    .describe(
-"Takes the last element of a sequence. Takes an n-dimensional tensor of "
-"the form [max sequence length, batchsize, other dims] and returns a (n-1)-dimensional tensor "
-"of the form [batchsize, other dims]. This operator takes an optional input tensor "
-"sequence_length of positive ints of dimension [batchsize] when the "
-"sequence_length option is set to true. This allows the operator to handle "
-"variable-length sequences. If sequence_length is false, then each example "
-"in the batch is assumed to have the max sequence length."
-)
+    .describe(R"code(Takes the last element of a sequence.
+
+This function takes an n-dimensional input array of the form
+[max_sequence_length, batch_size, other_feature_dims] and returns a (n-1)-dimensional array
+of the form [batch_size, other_feature_dims].
+
+Parameter `sequence_length` is used to handle variable-length sequences. `sequence_length` should be
+an input array of positive ints of dimension [batch_size]. To use this parameter,
+set `use_sequence_length` to `True`, otherwise each example in the batch is assumed
+to have the max sequence length.
+
+.. note:: Alternatively, you can also use `take` operator.
+
+Example::
+
+   x = [[[  1.,   2.,   3.],
+         [  4.,   5.,   6.],
+         [  7.,   8.,   9.]],
+
+        [[ 10.,   11.,   12.],
+         [ 13.,   14.,   15.],
+         [ 16.,   17.,   18.]],
+
+        [[  19.,   20.,   21.],
+         [  22.,   23.,   24.],
+         [  25.,   26.,   27.]]]
+
+   // returns last sequence when sequence_length parameter is not used
+   SequenceLast(x) = [[  19.,   20.,   21.],
+                      [  22.,   23.,   24.],
+                      [  25.,   26.,   27.]]
+
+   // sequence_length y is used
+   SequenceLast(x, y=[1,1,1], use_sequence_length=True) =
+            [[  1.,   2.,   3.],
+             [  4.,   5.,   6.],
+             [  7.,   8.,   9.]]
+
+   // sequence_length y is used
+   SequenceLast(x, y=[1,2,3], use_sequence_length=True) =
+            [[  1.,    2.,   3.],
+             [  13.,  14.,  15.],
+             [  25.,  26.,  27.]]
+
+)code" ADD_FILELINE)
     .add_argument("data", "NDArray-or-Symbol",
-                  "n-dimensional input tensor of the form [max sequence "
-                  "length, batchsize, other dims]")
+                  "n-dimensional input array of the form [max_sequence_length,"
+                  " batch_size, other_feature_dims] where n>2")
     .add_argument("sequence_length", "NDArray-or-Symbol",
-                  "vector of sequence lengths of size batchsize")
+                  "vector of sequence lengths of the form [batch_size]")
     .add_arguments(SequenceLastParam::__FIELDS__());
 
 }  // namespace op

diff --git a/src/operator/sequence_mask-inl.h b/src/operator/sequence_mask-inl.h
@@ -35,7 +35,7 @@ struct SequenceMaskParam : public dmlc::Parameter<SequenceMaskParam> {
     DMLC_DECLARE_FIELD(use_sequence_length)
         .set_default(false)
         .describe(
-            "If set to true, this layer takes in extra input sequence_length "
+            "If set to true, this layer takes in an extra input parameter `sequence_length` "
             "to specify variable length sequence");
     DMLC_DECLARE_FIELD(value).set_default(0.).describe(
         "The value to be used as a mask.");

diff --git a/src/operator/sequence_mask.cc b/src/operator/sequence_mask.cc
@@ -43,20 +43,78 @@ Operator *SequenceMaskProp::CreateOperatorEx(Context ctx,
 DMLC_REGISTER_PARAMETER(SequenceMaskParam);
 
 MXNET_REGISTER_OP_PROPERTY(SequenceMask, SequenceMaskProp)
-    .describe(
-"Sets all elements outside the sequence to a constant value. Takes an n-dimensional tensor of the "
-"form [max sequence length, batchsize, other dims] and returns a tensor of the same "
-"shape. This operator takes an optional input tensor sequence_length of positive ints of "
-"dimension [batchsize] when the sequence_length option is set to true. This allows the "
-"operator to handle variable-length sequences. If sequence_length is false, then each "
-"example in the batch is assumed to have the max sequence length, and this operator becomes "
-"the identity operator."
-)
+    .describe(R"code(Sets all elements outside the sequence to a constant value.
+
+This function takes an n-dimensional input array of the form
+[max_sequence_length, batch_size, other_feature_dims] and returns an array of the same shape.
+
+Parameter `sequence_length` is used to handle variable-length sequences. `sequence_length`
+should be an input array of positive ints of dimension [batch_size].
+To use this parameter, set `use_sequence_length` to `True`,
+otherwise each example in the batch is assumed to have the max sequence length and
+this operator works as the `identity` operator.
+
+Example::
+
+   x = [[[  1.,   2.,   3.],
+         [  4.,   5.,   6.]],
+
+        [[  7.,   8.,   9.],
+         [ 10.,  11.,  12.]],
+
+        [[ 13.,  14.,   15.],
+         [ 16.,  17.,   18.]]]
+
+   // Batch 1
+   B1 = [[  1.,   2.,   3.],
+         [  7.,   8.,   9.],
+         [ 13.,  14.,  15.]]
+
+   // Batch 2
+   B2 = [[  4.,   5.,   6.],
+         [ 10.,  11.,  12.],
+         [ 16.,  17.,  18.]]
+
+   // works as identity operator when sequence_length parameter is not used
+   SequenceMask(x) = [[[  1.,   2.,   3.],
+                       [  4.,   5.,   6.]],
+
+                      [[  7.,   8.,   9.],
+                       [ 10.,  11.,  12.]],
+
+                      [[ 13.,  14.,   15.],
+                       [ 16.,  17.,   18.]]]
+
+   // sequence_length [1,1] means 1 of each batch will be kept
+   // and other rows are masked with default mask value = 0
+   SequenceMask(x, y=[1,1], use_sequence_length=True) =
+                [[[  1.,   2.,   3.],
+                  [  4.,   5.,   6.]],
+
+                 [[  0.,   0.,   0.],
+                  [  0.,   0.,   0.]],
+
+                 [[  0.,   0.,   0.],
+                  [  0.,   0.,   0.]]]
+
+   // sequence_length [2,3] means 2 of batch B1 and 3 of batch B2 will be kept
+   // and other rows are masked with value = 1
+   SequenceMask(x, y=[2,3], use_sequence_length=True, value=1) =
+                [[[  1.,   2.,   3.],
+                  [  4.,   5.,   6.]],
+
+                 [[  7.,   8.,   9.],
+                  [  10.,  11.,  12.]],
+
+                 [[   1.,   1.,   1.],
+                  [  16.,  17.,  18.]]]
+
+)code" ADD_FILELINE)
     .add_argument("data", "NDArray-or-Symbol",
-                  "n-dimensional input tensor of the form [max sequence "
-                  "length, batchsize, other dims]")
+                  "n-dimensional input array of the form [max_sequence_length,"
+                  " batch_size, other_feature_dims] where n>2")
     .add_argument("sequence_length", "NDArray-or-Symbol",
-                  "vector of sequence lengths of size batchsize")
+                  "vector of sequence lengths of the form [batch_size]")
     .add_arguments(SequenceMaskParam::__FIELDS__());
 
 }  // namespace op

diff --git a/src/operator/sequence_reverse-inl.h b/src/operator/sequence_reverse-inl.h
@@ -34,7 +34,7 @@ struct SequenceReverseParam : public dmlc::Parameter<SequenceReverseParam> {
     DMLC_DECLARE_FIELD(use_sequence_length)
         .set_default(false)
         .describe(
-            "If set to true, this layer takes in extra input sequence_length "
+            "If set to true, this layer takes in an extra input parameter `sequence_length` "
             "to specify variable length sequence");
   }
 };