From 17bfe18f5e01d5a4a3011d52cabbfbd8e5cab108 Mon Sep 17 00:00:00 2001
From: Roshani Nagmote <roshaninagmote2@gmail.com>
Date: Fri, 14 Apr 2017 16:16:11 -0700
Subject: [PATCH] Fix docs for some NDArray functions (#5808)

* sort,argsort,topk,argmin,argmax,argmax_channel docs modified

* some sentence structure changes

* revert some changes made

* minor change

* sentence restructering

* sequencelast modified

* minor word changes

* formatting fixes

* SequenceMask added

* docs for SequenceReverse,true_divide is added

* line break

* minor change

* take, batch_take operators added

* minor formatting changes

* Changes after review

* note modified
---
 python/mxnet/ndarray.py                       | 68 +++++++--------
 src/operator/sequence_last-inl.h              |  2 +-
 src/operator/sequence_last.cc                 | 60 +++++++++++---
 src/operator/sequence_mask-inl.h              |  2 +-
 src/operator/sequence_mask.cc                 | 82 ++++++++++++++++---
 src/operator/sequence_reverse-inl.h           |  2 +-
 src/operator/sequence_reverse.cc              | 80 +++++++++++++++---
 .../tensor/broadcast_reduce_op_index.cc       | 62 +++++++++++++-
 .../elemwise_binary_broadcast_op_basic.cc     |  2 +-
 .../elemwise_binary_broadcast_op_extended.cc  |  3 +-
 .../elemwise_binary_broadcast_op_logic.cc     | 12 +--
 src/operator/tensor/indexing_op.cc            | 43 ++++++----
 src/operator/tensor/indexing_op.h             |  8 +-
 src/operator/tensor/ordering_op-inl.h         | 14 ++--
 src/operator/tensor/ordering_op.cc            | 41 ++++++----
 15 files changed, 355 insertions(+), 126 deletions(-)

diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py
index f1802147123c..fe618987d874 100644
--- a/python/mxnet/ndarray.py
+++ b/python/mxnet/ndarray.py
@@ -1210,8 +1210,8 @@ def add(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1271,8 +1271,8 @@ def subtract(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1331,8 +1331,8 @@ def multiply(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1391,8 +1391,8 @@ def divide(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1448,8 +1448,8 @@ def power(base, exp):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1508,8 +1508,8 @@ def maximum(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1564,8 +1564,8 @@ def minimum(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1614,7 +1614,7 @@ def minimum(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def equal(lhs, rhs):
-    """Returns the result of element-wise **equal to**(==) comparison operation with
+    """Returns the result of element-wise **equal to** (==) comparison operation with
     broadcasting.
 
     For each element in input arrays, return 1(true) if corresponding elements are same,
@@ -1624,8 +1624,8 @@ def equal(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1677,7 +1677,7 @@ def equal(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def not_equal(lhs, rhs):
-    """Returns the result of element-wise **not equal to**(!=) comparison operation
+    """Returns the result of element-wise **not equal to** (!=) comparison operation
     with broadcasting.
 
     For each element in input arrays, return 1(true) if corresponding elements are different,
@@ -1687,8 +1687,8 @@ def not_equal(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1743,7 +1743,7 @@ def not_equal(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def greater(lhs, rhs):
-    """Returns the result of element-wise **greater than**(>) comparison operation
+    """Returns the result of element-wise **greater than** (>) comparison operation
     with broadcasting.
 
     For each element in input arrays, return 1(true) if lhs elements are greater than rhs,
@@ -1753,8 +1753,8 @@ def greater(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1806,7 +1806,7 @@ def greater(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def greater_equal(lhs, rhs):
-    """Returns the result of element-wise **greater than or equal to**(>=) comparison
+    """Returns the result of element-wise **greater than or equal to** (>=) comparison
     operation with broadcasting.
 
     For each element in input arrays, return 1(true) if lhs elements are greater than equal to rhs,
@@ -1816,8 +1816,8 @@ def greater_equal(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1869,7 +1869,8 @@ def greater_equal(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def lesser(lhs, rhs):
-    """Returns the result of element-wise **lesser than**(<) comparison operation with broadcasting.
+    """Returns the result of element-wise **lesser than** (<) comparison operation
+    with broadcasting.
 
     For each element in input arrays, return 1(true) if lhs elements are less than rhs,
     otherwise return 0(false).
@@ -1878,8 +1879,8 @@ def lesser(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1932,7 +1933,7 @@ def lesser(lhs, rhs):
 
 
 def lesser_equal(lhs, rhs):
-    """Returns the result of element-wise **lesser than or equal to**(<=) comparison
+    """Returns the result of element-wise **lesser than or equal to** (<=) comparison
     operation with broadcasting.
 
     For each element in input arrays, return 1(true) if lhs elements are
@@ -1942,8 +1943,8 @@ def lesser_equal(lhs, rhs):
 
     .. note::
 
-        If the corresponding dimensions of two arrays have the same size or one of them has size 1,
-    then the arrays are broadcastable to a common shape.
+       If the corresponding dimensions of two arrays have the same size or one of them has size 1,
+       then the arrays are broadcastable to a common shape.
 
     Parameters
     ----------
@@ -1995,7 +1996,8 @@ def lesser_equal(lhs, rhs):
     # pylint: enable= no-member, protected-access
 
 def true_divide(lhs, rhs):
-    """Same as ``divide``.
+
+    """This function is similar to :meth:`divide`.
     """
     return divide(lhs, rhs)
 
diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h
index 849fbcef4193..c2acbf164197 100644
--- a/src/operator/sequence_last-inl.h
+++ b/src/operator/sequence_last-inl.h
@@ -34,7 +34,7 @@ struct SequenceLastParam : public dmlc::Parameter<SequenceLastParam> {
     DMLC_DECLARE_FIELD(use_sequence_length)
         .set_default(false)
         .describe(
-            "If set to true, this layer takes in extra input sequence_length "
+            "If set to true, this layer takes in an extra input parameter `sequence_length` "
             "to specify variable length sequence");
   }
 };
diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc
index a930aaaccb4a..7c796613efa8 100644
--- a/src/operator/sequence_last.cc
+++ b/src/operator/sequence_last.cc
@@ -30,20 +30,56 @@ Operator *SequenceLastProp::CreateOperatorEx(Context ctx,
 DMLC_REGISTER_PARAMETER(SequenceLastParam);
 
 MXNET_REGISTER_OP_PROPERTY(SequenceLast, SequenceLastProp)
-    .describe(
-"Takes the last element of a sequence. Takes an n-dimensional tensor of "
-"the form [max sequence length, batchsize, other dims] and returns a (n-1)-dimensional tensor "
-"of the form [batchsize, other dims]. This operator takes an optional input tensor "
-"sequence_length of positive ints of dimension [batchsize] when the "
-"sequence_length option is set to true. This allows the operator to handle "
-"variable-length sequences. If sequence_length is false, then each example "
-"in the batch is assumed to have the max sequence length."
-)
+    .describe(R"code(Takes the last element of a sequence.
+
+This function takes an n-dimensional input array of the form
+[max_sequence_length, batch_size, other_feature_dims] and returns a (n-1)-dimensional array
+of the form [batch_size, other_feature_dims].
+
+Parameter `sequence_length` is used to handle variable-length sequences. `sequence_length` should be
+an input array of positive ints of dimension [batch_size]. To use this parameter,
+set `use_sequence_length` to `True`, otherwise each example in the batch is assumed
+to have the max sequence length.
+
+.. note:: Alternatively, you can also use `take` operator.
+
+Example::
+
+   x = [[[  1.,   2.,   3.],
+         [  4.,   5.,   6.],
+         [  7.,   8.,   9.]],
+
+        [[ 10.,   11.,   12.],
+         [ 13.,   14.,   15.],
+         [ 16.,   17.,   18.]],
+
+        [[  19.,   20.,   21.],
+         [  22.,   23.,   24.],
+         [  25.,   26.,   27.]]]
+
+   // returns last sequence when sequence_length parameter is not used
+   SequenceLast(x) = [[  19.,   20.,   21.],
+                      [  22.,   23.,   24.],
+                      [  25.,   26.,   27.]]
+
+   // sequence_length y is used
+   SequenceLast(x, y=[1,1,1], use_sequence_length=True) =
+            [[  1.,   2.,   3.],
+             [  4.,   5.,   6.],
+             [  7.,   8.,   9.]]
+
+   // sequence_length y is used
+   SequenceLast(x, y=[1,2,3], use_sequence_length=True) =
+            [[  1.,    2.,   3.],
+             [  13.,  14.,  15.],
+             [  25.,  26.,  27.]]
+
+)code" ADD_FILELINE)
     .add_argument("data", "NDArray-or-Symbol",
-                  "n-dimensional input tensor of the form [max sequence "
-                  "length, batchsize, other dims]")
+                  "n-dimensional input array of the form [max_sequence_length,"
+                  " batch_size, other_feature_dims] where n>2")
     .add_argument("sequence_length", "NDArray-or-Symbol",
-                  "vector of sequence lengths of size batchsize")
+                  "vector of sequence lengths of the form [batch_size]")
     .add_arguments(SequenceLastParam::__FIELDS__());
 
 }  // namespace op
diff --git a/src/operator/sequence_mask-inl.h b/src/operator/sequence_mask-inl.h
index b6d9853f820a..69c98746553b 100644
--- a/src/operator/sequence_mask-inl.h
+++ b/src/operator/sequence_mask-inl.h
@@ -35,7 +35,7 @@ struct SequenceMaskParam : public dmlc::Parameter<SequenceMaskParam> {
     DMLC_DECLARE_FIELD(use_sequence_length)
         .set_default(false)
         .describe(
-            "If set to true, this layer takes in extra input sequence_length "
+            "If set to true, this layer takes in an extra input parameter `sequence_length` "
             "to specify variable length sequence");
     DMLC_DECLARE_FIELD(value).set_default(0.).describe(
         "The value to be used as a mask.");
diff --git a/src/operator/sequence_mask.cc b/src/operator/sequence_mask.cc
index 47adda741c37..763bc17171ae 100644
--- a/src/operator/sequence_mask.cc
+++ b/src/operator/sequence_mask.cc
@@ -43,20 +43,78 @@ Operator *SequenceMaskProp::CreateOperatorEx(Context ctx,
 DMLC_REGISTER_PARAMETER(SequenceMaskParam);
 
 MXNET_REGISTER_OP_PROPERTY(SequenceMask, SequenceMaskProp)
-    .describe(
-"Sets all elements outside the sequence to a constant value. Takes an n-dimensional tensor of the "
-"form [max sequence length, batchsize, other dims] and returns a tensor of the same "
-"shape. This operator takes an optional input tensor sequence_length of positive ints of "
-"dimension [batchsize] when the sequence_length option is set to true. This allows the "
-"operator to handle variable-length sequences. If sequence_length is false, then each "
-"example in the batch is assumed to have the max sequence length, and this operator becomes "
-"the identity operator."
-)
+    .describe(R"code(Sets all elements outside the sequence to a constant value.
+
+This function takes an n-dimensional input array of the form
+[max_sequence_length, batch_size, other_feature_dims] and returns an array of the same shape.
+
+Parameter `sequence_length` is used to handle variable-length sequences. `sequence_length`
+should be an input array of positive ints of dimension [batch_size].
+To use this parameter, set `use_sequence_length` to `True`,
+otherwise each example in the batch is assumed to have the max sequence length and
+this operator works as the `identity` operator.
+
+Example::
+
+   x = [[[  1.,   2.,   3.],
+         [  4.,   5.,   6.]],
+
+        [[  7.,   8.,   9.],
+         [ 10.,  11.,  12.]],
+
+        [[ 13.,  14.,   15.],
+         [ 16.,  17.,   18.]]]
+
+   // Batch 1
+   B1 = [[  1.,   2.,   3.],
+         [  7.,   8.,   9.],
+         [ 13.,  14.,  15.]]
+
+   // Batch 2
+   B2 = [[  4.,   5.,   6.],
+         [ 10.,  11.,  12.],
+         [ 16.,  17.,  18.]]
+
+   // works as identity operator when sequence_length parameter is not used
+   SequenceMask(x) = [[[  1.,   2.,   3.],
+                       [  4.,   5.,   6.]],
+
+                      [[  7.,   8.,   9.],
+                       [ 10.,  11.,  12.]],
+
+                      [[ 13.,  14.,   15.],
+                       [ 16.,  17.,   18.]]]
+
+   // sequence_length [1,1] means 1 of each batch will be kept
+   // and other rows are masked with default mask value = 0
+   SequenceMask(x, y=[1,1], use_sequence_length=True) =
+                [[[  1.,   2.,   3.],
+                  [  4.,   5.,   6.]],
+
+                 [[  0.,   0.,   0.],
+                  [  0.,   0.,   0.]],
+
+                 [[  0.,   0.,   0.],
+                  [  0.,   0.,   0.]]]
+
+   // sequence_length [2,3] means 2 of batch B1 and 3 of batch B2 will be kept
+   // and other rows are masked with value = 1
+   SequenceMask(x, y=[2,3], use_sequence_length=True, value=1) =
+                [[[  1.,   2.,   3.],
+                  [  4.,   5.,   6.]],
+
+                 [[  7.,   8.,   9.],
+                  [  10.,  11.,  12.]],
+
+                 [[   1.,   1.,   1.],
+                  [  16.,  17.,  18.]]]
+
+)code" ADD_FILELINE)
     .add_argument("data", "NDArray-or-Symbol",
-                  "n-dimensional input tensor of the form [max sequence "
-                  "length, batchsize, other dims]")
+                  "n-dimensional input array of the form [max_sequence_length,"
+                  " batch_size, other_feature_dims] where n>2")
     .add_argument("sequence_length", "NDArray-or-Symbol",
-                  "vector of sequence lengths of size batchsize")
+                  "vector of sequence lengths of the form [batch_size]")
     .add_arguments(SequenceMaskParam::__FIELDS__());
 
 }  // namespace op
diff --git a/src/operator/sequence_reverse-inl.h b/src/operator/sequence_reverse-inl.h
index d7f296653dde..048eb3e2eb78 100644
--- a/src/operator/sequence_reverse-inl.h
+++ b/src/operator/sequence_reverse-inl.h
@@ -34,7 +34,7 @@ struct SequenceReverseParam : public dmlc::Parameter<SequenceReverseParam> {
     DMLC_DECLARE_FIELD(use_sequence_length)
         .set_default(false)
         .describe(
-            "If set to true, this layer takes in extra input sequence_length "
+            "If set to true, this layer takes in an extra input parameter `sequence_length` "
             "to specify variable length sequence");
   }
 };
diff --git a/src/operator/sequence_reverse.cc b/src/operator/sequence_reverse.cc
index 4d229ad8e9e6..871db9b3d486 100644
--- a/src/operator/sequence_reverse.cc
+++ b/src/operator/sequence_reverse.cc
@@ -30,19 +30,77 @@ Operator *SequenceReverseProp::CreateOperatorEx(
 DMLC_REGISTER_PARAMETER(SequenceReverseParam);
 
 MXNET_REGISTER_OP_PROPERTY(SequenceReverse, SequenceReverseProp)
-    .describe(
-"Reverses the elements of each sequence. Takes an n-dimensional tensor of the form "
-"[max sequence length, batchsize, other dims] and returns a tensor of the same shape. "
-"This operator takes an optional input tensor sequence_length of positive ints of dimension "
-"[batchsize] when the sequence_length option is set to true. This allows the operator to "
-"handle variable-length sequences. If sequence_length is false, then each example "
-"in the batch is assumed to have the max sequence length."
-)
+    .describe(R"code(Reverses the elements of each sequence.
+
+This function takes an n-dimensional input array of the form [max_sequence_length, batch_size, other_feature_dims]
+and returns an array of the same shape.
+
+Parameter `sequence_length` is used to handle variable-length sequences.
+`sequence_length` should be an input array of positive ints of dimension [batch_size].
+To use this parameter, set `use_sequence_length` to `True`,
+otherwise each example in the batch is assumed to have the max sequence length.
+
+Example::
+
+   x = [[[  1.,   2.,   3.],
+         [  4.,   5.,   6.]],
+
+        [[  7.,   8.,   9.],
+         [ 10.,  11.,  12.]],
+
+        [[ 13.,  14.,   15.],
+         [ 16.,  17.,   18.]]]
+
+   // Batch 1
+   B1 = [[  1.,   2.,   3.],
+         [  7.,   8.,   9.],
+         [ 13.,  14.,  15.]]
+
+   // Batch 2
+   B2 = [[  4.,   5.,   6.],
+         [ 10.,  11.,  12.],
+         [ 16.,  17.,  18.]]
+
+   // returns reverse sequence when sequence_length parameter is not used
+   SequenceReverse(x) = [[[ 13.,  14.,   15.],
+                          [ 16.,  17.,   18.]],
+
+                         [[  7.,   8.,   9.],
+                          [ 10.,  11.,  12.]],
+
+                         [[  1.,   2.,   3.],
+                          [  4.,   5.,   6.]]]
+
+   // sequence_length [2,2] means 2 rows of
+   // both batch B1 and B2 will be reversed.
+   SequenceReverse(x, y=[2,2], use_sequence_length=True) =
+                     [[[  7.,   8.,   9.],
+                       [ 10.,  11.,  12.]],
+
+                      [[  1.,   2.,   3.],
+                       [  4.,   5.,   6.]],
+
+                      [[ 13.,  14.,   15.],
+                       [ 16.,  17.,   18.]]]
+
+   // sequence_length [2,3] means 2 of batch B2 and 3 of batch B3
+   // will be reversed.
+   SequenceReverse(x, y=[2,3], use_sequence_length=True) =
+                    [[[  7.,   8.,   9.],
+                      [ 16.,  17.,  18.]],
+
+                     [[  1.,   2.,   3.],
+                      [ 10.,  11.,  12.]],
+
+                     [[ 13.,  14,   15.],
+                      [  4.,   5.,   6.]]]
+
+)code" ADD_FILELINE)
     .add_argument("data", "NDArray-or-Symbol",
-                  "n-dimensional input tensor of the form [max sequence "
-                  "length, batchsize, other dims]")
+                  "n-dimensional input array of the form [max_sequence_length,"
+                  " batch_size, other dims] where n>2 ")
     .add_argument("sequence_length", "NDArray-or-Symbol",
-                  "vector of sequence lengths of size batchsize")
+                  "vector of sequence lengths of the form [batch_size]")
     .add_arguments(SequenceReverseParam::__FIELDS__());
 
 }  // namespace op
diff --git a/src/operator/tensor/broadcast_reduce_op_index.cc b/src/operator/tensor/broadcast_reduce_op_index.cc
index 87929d12140e..dc1e33bf395a 100644
--- a/src/operator/tensor/broadcast_reduce_op_index.cc
+++ b/src/operator/tensor/broadcast_reduce_op_index.cc
@@ -8,17 +8,71 @@
 namespace mxnet {
 namespace op {
 MXNET_OPERATOR_REGISTER_REDUCE_AXIS(argmax)
-.MXNET_DESCRIBE("Returns the indices of the maximum values along an axis.")
+.describe(R"code(Returns indices of the maximum values along an axis.  
+
+In the case of multiple occurrences of maximum values, the indices corresponding to the first occurrence
+are returned.
+
+Example::  
+
+  x = [[ 0.,  1.,  2.], 
+       [ 3.,  4.,  5.]]  
+
+  // argmax along axis 0
+  argmax(x, axis=0) = [ 1.,  1.,  1.]   
+
+  // argmax along axis 1
+  argmax(x, axis=1) = [ 2.,  2.]   
+
+  // argmax along axis 1 keeping same dims as an input array
+  argmax(x, axis=1, keepdims=True) = [[ 2.], 
+                                      [ 2.]]   
+)code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SearchAxisCompute<cpu, mshadow::red::maximum>)
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
 
 MXNET_OPERATOR_REGISTER_REDUCE_AXIS(argmin)
-.MXNET_DESCRIBE("Returns the indices of the minimum values along an axis.")
+.describe(R"code(Returns indices of the minimum values along an axis.
+
+In the case of multiple occurrences of minimum values, the indices corresponding to the first occurrence
+are returned.
+
+Example::  
+
+  x = [[ 0.,  1.,  2.], 
+       [ 3.,  4.,  5.]]  
+
+  // argmin along axis 0
+  argmin(x, axis=0) = [ 0.,  0.,  0.]   
+
+  // argmin along axis 1
+  argmin(x, axis=1) = [ 0.,  0.]   
+
+  // argmin along axis 1 keeping same dims as an input array
+  argmin(x, axis=1, keepdims=True) = [[ 0.], 
+                                      [ 0.]]   
+
+)code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SearchAxisCompute<cpu, mshadow::red::minimum>)
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
 
 // Legacy support
 NNVM_REGISTER_OP(argmax_channel)
+.describe(R"code(Returns argmax indices of each channel from the input array.
+
+The result will be an NDArray of shape (num_channel,).
+
+In case of multiple occurrences of the maximum values, the indices corresponding to the first occurrence
+are returned.
+
+Example::  
+
+  x = [[ 0.,  1.,  2.], 
+       [ 3.,  4.,  5.]]  
+
+  argmax_channel(x) = [ 2.,  2.]   
+
+)code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser([](NodeAttrs* attrs) {
@@ -30,7 +84,7 @@ NNVM_REGISTER_OP(argmax_channel)
 .set_attr<nnvm::FInferShape>("FInferShape", ReduceAxisShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", SearchAxisCompute<cpu, mshadow::red::maximum>)
-.add_argument("data", "NDArray-or-Symbol", "Source input");
+.add_argument("data", "NDArray-or-Symbol", "The input array");
 
 NNVM_REGISTER_OP(pick)
 .set_num_inputs(2)
@@ -52,7 +106,7 @@ NNVM_REGISTER_OP(pick)
     ret.emplace_back(nnvm::NodeEntry{p, 0, 0});
     return ret;
   })
-.add_argument("data", "NDArray-or-Symbol", "Source input")
+.add_argument("data", "NDArray-or-Symbol", "The input array")
 .add_argument("index", "NDArray-or-Symbol", "Index array")
 .add_arguments(ReduceAxisParam::__FIELDS__());
 
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc b/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc
index c6823831decd..0d0a1d8b5df0 100755
--- a/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc
@@ -52,7 +52,7 @@ MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_sub)
 .add_alias("broadcast_minus")
 .describe(R"code(Returns element-wise difference of the input arrays with broadcasting.
 
-`broadcast_minus` is an alias to the function `broadcast_sub`
+`broadcast_minus` is an alias to the function `broadcast_sub`.
 
 Example::
 
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
index 1bd3323fa1db..127d8c0ec1c5 100755
--- a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
@@ -10,8 +10,7 @@
 namespace mxnet {
 namespace op {
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_power)
-.describe(R"code(Returns result of first array elements raised to powers from second array, element-wise
-                     with broadcasting.
+.describe(R"code(Returns result of first array elements raised to powers from second array, element-wise with broadcasting.
 
 Example::
 
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
index 37535a270b79..900f376fe421 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
@@ -11,7 +11,7 @@ namespace mxnet {
 namespace op {
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_equal)
-.describe(R"code(Returns the result of element-wise **equal to**(==) comparison operation with broadcasting.
+.describe(R"code(Returns the result of element-wise **equal to** (==) comparison operation with broadcasting.
 
 Example::
 
@@ -29,7 +29,7 @@ Example::
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_not_equal)
-.describe(R"code(Returns the result of element-wise **not equal to**(!=) comparison operation with broadcasting.
+.describe(R"code(Returns the result of element-wise **not equal to** (!=) comparison operation with broadcasting.
 
 Example::
 
@@ -47,7 +47,7 @@ Example::
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_greater)
-.describe(R"code(Returns the result of element-wise **greater than**(>) comparison operation with broadcasting.
+.describe(R"code(Returns the result of element-wise **greater than** (>) comparison operation with broadcasting.
 
 Example::
 
@@ -65,7 +65,7 @@ Example::
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_greater_equal)
-.describe(R"code(Returns the result of element-wise **greater than or equal to**(>=) comparison operation with broadcasting.
+.describe(R"code(Returns the result of element-wise **greater than or equal to** (>=) comparison operation with broadcasting.
 
 Example::
 
@@ -83,7 +83,7 @@ Example::
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_lesser)
-.describe(R"code(Returns the result of element-wise **lesser than**(<) comparison operation with broadcasting.
+.describe(R"code(Returns the result of element-wise **lesser than** (<) comparison operation with broadcasting.
 
 Example::
 
@@ -101,7 +101,7 @@ Example::
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_lesser_equal)
-.describe(R"code(Returns the result of element-wise **lesser than or equal to**(<=) comparison operation with broadcasting.
+.describe(R"code(Returns the result of element-wise **lesser than or equal to** (<=) comparison operation with broadcasting.
 
 Example::
 
diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc
index eac176b486e8..782dc0d7b3c5 100644
--- a/src/operator/tensor/indexing_op.cc
+++ b/src/operator/tensor/indexing_op.cc
@@ -52,28 +52,31 @@ NNVM_REGISTER_OP(_backward_Embedding)
 
 
 NNVM_REGISTER_OP(take)
-.describe(R"code(Take elements from an array along an axis.
+.describe(R"code(Takes elements from an input array along the given axis.
 
-Slice along a particular axis with the provided indices. E.g., given an input array
-with shape ``(d0, d1, d2)`` and indices with shape ``(i0, i1)``, then the output
-will have shape ``(i0, i1, d1, d2)``, with::
+This function slices the input array along a particular axis with the provided indices.
+
+Given an input array with shape ``(d0, d1, d2)`` and indices with shape ``(i0, i1)``, the output
+will have shape ``(i0, i1, d1, d2)``, computed by::
 
   output[i,j,:,:] = input[indices[i,j],:,:]
 
+.. note::
+   - `axis`- Only slicing along axis 0 is supported for now.
+   - `mode`- Only `clip` mode is supported for now.
+
 Examples::
 
   x = [[ 1.,  2.],
        [ 3.,  4.],
        [ 5.,  6.]]
 
- take(x, [[0,1],[1,2]]) = [[[ 1.,  2.],
-                            [ 3.,  4.]],
+  // takes elements with specified indices along axis 0
+  take(x, [[0,1],[1,2]]) = [[[ 1.,  2.],
+                             [ 3.,  4.]],
 
-                           [[ 3.,  4.],
-                            [ 5.,  6.]]]
-
-.. note::
-  Only slicing axis 0 is supported now.
+                            [[ 3.,  4.],
+                             [ 5.,  6.]]]
 
 )code" ADD_FILELINE)
 .set_num_inputs(2)
@@ -95,8 +98,8 @@ Examples::
     return MakeNonlossGradNode("_backward_take", n, ograds,
                                {n->inputs[1]}, n->attrs.dict);
   })
-.add_argument("a", "NDArray-or-Symbol", "The source array.")
-.add_argument("indices", "NDArray-or-Symbol", "The indices of the values to extract.")
+.add_argument("a", "NDArray-or-Symbol", "The input array.")
+.add_argument("indices", "NDArray-or-Symbol", "The indices of the values to be extracted.")
 .add_arguments(TakeParam::__FIELDS__());
 
 NNVM_REGISTER_OP(_backward_take)
@@ -111,10 +114,13 @@ NNVM_REGISTER_OP(_backward_take)
 
 
 NNVM_REGISTER_OP(batch_take)
-.describe(R"code(Take elements from a data batch.
+.describe(R"code(Takes elements from a data batch.
+
+.. note::
+  `batch_take` is deprecated. Use `pick` instead.
 
-Given an ``(d0, d1)`` input array, and ``(d0,)`` indices, the output will be a
-``(d0,)`` computed by::
+Given an input array of shape ``(d0, d1)`` and indices of shape ``(d0,)``, the result will be
+an output array of shape ``(d0,)`` with::
 
   output[i] = input[i, indices[i]]
 
@@ -124,6 +130,7 @@ Examples::
        [ 3.,  4.],
        [ 5.,  6.]]
 
+  // takes elements with specified indices
   batch_take(x, [0,1,0]) = [ 1.  4.  5.]
 
 )code" ADD_FILELINE)
@@ -136,8 +143,8 @@ Examples::
 .set_attr<nnvm::FInferShape>("FInferShape", BatchTakeOpShape)
 .set_attr<nnvm::FInferType>("FInferType", BatchTakeOpType)
 .set_attr<FCompute>("FCompute<cpu>", BatchTakeOpForward<cpu>)
-.add_argument("a", "NDArray-or-Symbol", "Input data array")
-.add_argument("indices", "NDArray-or-Symbol", "index array");
+.add_argument("a", "NDArray-or-Symbol", "The input array")
+.add_argument("indices", "NDArray-or-Symbol", "The index array");
 
 NNVM_REGISTER_OP(one_hot)
 .describe(R"code(Returns a one-hot array.
diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h
index fc1f398de18e..5cbe08ed5a34 100755
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -330,13 +330,17 @@ struct TakeParam: public dmlc::Parameter<TakeParam> {
     DMLC_DECLARE_FIELD(axis)
     .set_lower_bound(0)
     .set_default(0)
-    .describe("the axis of data tensor to be taken.");
+    .describe("The axis of input array to be taken.");
     DMLC_DECLARE_FIELD(mode)
     .add_enum("raise", take_::kRaise)
     .add_enum("wrap", take_::kWrap)
     .add_enum("clip", take_::kClip)
     .set_default(take_::kClip)
-    .describe("specify how out-of-bound indices bahave.");
+    .describe("Specify how out-of-bound indices bahave."
+              " \"clip\" means clip to the range. So, if all indices mentioned are too large,"
+              " they are replaced by the index that addresses the last element along an axis. "
+              " \"wrap\" means to wrap around. "
+              " \"raise\" means to raise an error. ");
   }
 };
 
diff --git a/src/operator/tensor/ordering_op-inl.h b/src/operator/tensor/ordering_op-inl.h
index b02d607d9efe..38d47bb8e21b 100755
--- a/src/operator/tensor/ordering_op-inl.h
+++ b/src/operator/tensor/ordering_op-inl.h
@@ -51,13 +51,13 @@ struct TopKParam : public dmlc::Parameter<TopKParam> {
     .add_enum("indices", topk_enum::kReturnIndices)
     .add_enum("mask", topk_enum::kReturnMask)
     .add_enum("both", topk_enum::kReturnBoth)
-    .describe("The return type."
-        " \"value\" means returning the top k values,"
-        " \"indices\" means returning the indices of the top k values,"
+    .describe("The return type.\n"
+        " \"value\" means to return the top k values,"
+        " \"indices\" means to return the indices of the top k values,"
         " \"mask\" means to return a mask array containing 0 and 1. 1 means the top k values."
-        " \"both\" means to return both value and indices.");
+        " \"both\" means to return a list of both values and indices of top k elements.");
     DMLC_DECLARE_FIELD(is_ascend).set_default(false)
-      .describe("Whether to choose k largest or k smallest."
+      .describe("Whether to choose k largest or k smallest elements."
                 " Top K largest elements will be chosen if set to false.");
   }
 };
@@ -70,7 +70,7 @@ struct SortParam : public dmlc::Parameter<SortParam> {
     .describe("Axis along which to choose sort the input tensor."
               " If not given, the flattened array is used. Default is -1.");
     DMLC_DECLARE_FIELD(is_ascend).set_default(true)
-      .describe("Whether sort in ascending or descending order.");
+      .describe("Whether to sort in ascending or descending order.");
   }
 };
 
@@ -82,7 +82,7 @@ struct ArgSortParam : public dmlc::Parameter<ArgSortParam> {
     .describe("Axis along which to sort the input tensor."
               " If not given, the flattened array is used. Default is -1.");
     DMLC_DECLARE_FIELD(is_ascend).set_default(true)
-      .describe("Whether sort in ascending or descending order.");
+      .describe("Whether to sort in ascending or descending order.");
   }
 };
 
diff --git a/src/operator/tensor/ordering_op.cc b/src/operator/tensor/ordering_op.cc
index d9ad0f6efbaa..3308836c1840 100644
--- a/src/operator/tensor/ordering_op.cc
+++ b/src/operator/tensor/ordering_op.cc
@@ -15,23 +15,31 @@ DMLC_REGISTER_PARAMETER(SortParam);
 DMLC_REGISTER_PARAMETER(ArgSortParam);
 
 NNVM_REGISTER_OP(topk)
-.describe(R"code(Return the top *k* elements in an array.
+.describe(R"code(Returns the top *k* elements in an input array along the given axis.
 
 Examples::
 
   x = [[ 0.3,  0.2,  0.4],
        [ 0.1,  0.3,  0.2]]
 
-  // return the index of the largest element on last axis
+  // returns an index of the largest element on last axis
   topk(x) = [[ 2.],
              [ 1.]]
 
-  // return the value of the top-2 elements on last axis
+  // returns the value of top-2 largest elements on last axis
   topk(x, ret_typ='value', k=2) = [[ 0.4,  0.3],
                                    [ 0.3,  0.2]]
 
-  // flatten and then return both index and value
-  topk(x, ret_typ='both', k=2, axis=None) = [ 0.4,  0.3], [ 2.,  0.]
+  // returns the value of top-2 smallest elements on last axis
+  topk(x, ret_typ='value', k=2, is_ascend=1) = [[ 0.2 ,  0.3],
+                                               [ 0.1 ,  0.2]]
+
+  // returns the value of top-2 largest elements on axis 0
+  topk(x, axis=0, ret_typ='value', k=2) = [[ 0.3,  0.3,  0.4],
+                                           [ 0.1,  0.2,  0.2]]
+
+  // flattens and then returns list of both values and indices
+  topk(x, ret_typ='both', k=2) = [[[ 0.4,  0.3], [ 0.3,  0.2]] ,  [[ 2.,  0.], [ 1.,  2.]]]
 
 )code" ADD_FILELINE)
 .set_num_inputs(1)
@@ -59,7 +67,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
   })
-.add_argument("data", "NDArray-or-Symbol", "Source input")
+.add_argument("data", "NDArray-or-Symbol", "The input array")
 .add_arguments(TopKParam::__FIELDS__());
 
 NNVM_REGISTER_OP(_backward_topk)
@@ -74,21 +82,21 @@ NNVM_REGISTER_OP(_backward_topk)
 });
 
 NNVM_REGISTER_OP(sort)
-.describe(R"code(Return a sorted copy of an array.
+.describe(R"code(Returns a sorted copy of an input array along the given axis.
 
 Examples::
 
   x = [[ 1, 4],
        [ 3, 1]]
 
-  // sort along the last axis
+  // sorts along the last axis
   sort(x) = [[ 1.,  4.],
              [ 1.,  3.]]
 
-  // flatten and then sort
-  sort(x, axis=None) = [ 1.,  1.,  3.,  4.]
+  // flattens and then sorts
+  sort(x) = [ 1.,  1.,  3.,  4.]
 
-  // sort long the first axis
+  // sorts along the first axis
   sort(x, axis=0) = [[ 1.,  1.],
                      [ 3.,  4.]]
 
@@ -122,11 +130,14 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
   })
-.add_argument("data", "NDArray-or-Symbol", "Source input")
+.add_argument("data", "NDArray-or-Symbol", "The input array")
 .add_arguments(SortParam::__FIELDS__());
 
 NNVM_REGISTER_OP(argsort)
-.describe(R"code(Returns the indices that can sort an array.
+.describe(R"code(Returns the indices that would sort an input array along the given axis.
+
+This function performs sorting along the given axis and returns an array of indices having same shape
+as an input array that index data in sorted order.
 
 Examples::
 
@@ -142,7 +153,7 @@ Examples::
                         [ 0.,  1.,  0.]]
 
   // flatten and then sort
-  argsort(x, axis=None) = [ 3.,  1.,  5.,  0.,  4.,  2.]
+  argsort(x) = [ 3.,  1.,  5.,  0.,  4.,  2.]
 )code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
@@ -155,7 +166,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
   })
-.add_argument("data", "NDArray-or-Symbol", "Source input")
+.add_argument("data", "NDArray-or-Symbol", "The input array")
 .add_arguments(ArgSortParam::__FIELDS__());
 }  // namespace op
 }  // namespace mxnet