From 17bfe18f5e01d5a4a3011d52cabbfbd8e5cab108 Mon Sep 17 00:00:00 2001 From: Roshani Nagmote Date: Fri, 14 Apr 2017 16:16:11 -0700 Subject: [PATCH] Fix docs for some NDArray functions (#5808) * sort,argsort,topk,argmin,argmax,argmax_channel docs modified * some sentence structure changes * revert some changes made * minor change * sentence restructering * sequencelast modified * minor word changes * formatting fixes * SequenceMask added * docs for SequenceReverse,true_divide is added * line break * minor change * take, batch_take operators added * minor formatting changes * Changes after review * note modified --- python/mxnet/ndarray.py | 68 +++++++-------- src/operator/sequence_last-inl.h | 2 +- src/operator/sequence_last.cc | 60 +++++++++++--- src/operator/sequence_mask-inl.h | 2 +- src/operator/sequence_mask.cc | 82 ++++++++++++++++--- src/operator/sequence_reverse-inl.h | 2 +- src/operator/sequence_reverse.cc | 80 +++++++++++++++--- .../tensor/broadcast_reduce_op_index.cc | 62 +++++++++++++- .../elemwise_binary_broadcast_op_basic.cc | 2 +- .../elemwise_binary_broadcast_op_extended.cc | 3 +- .../elemwise_binary_broadcast_op_logic.cc | 12 +-- src/operator/tensor/indexing_op.cc | 43 ++++++---- src/operator/tensor/indexing_op.h | 8 +- src/operator/tensor/ordering_op-inl.h | 14 ++-- src/operator/tensor/ordering_op.cc | 41 ++++++---- 15 files changed, 355 insertions(+), 126 deletions(-) diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py index f1802147123c..fe618987d874 100644 --- a/python/mxnet/ndarray.py +++ b/python/mxnet/ndarray.py @@ -1210,8 +1210,8 @@ def add(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1271,8 +1271,8 @@ def subtract(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1331,8 +1331,8 @@ def multiply(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1391,8 +1391,8 @@ def divide(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1448,8 +1448,8 @@ def power(base, exp): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1508,8 +1508,8 @@ def maximum(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1564,8 +1564,8 @@ def minimum(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1614,7 +1614,7 @@ def minimum(lhs, rhs): # pylint: enable= no-member, protected-access def equal(lhs, rhs): - """Returns the result of element-wise **equal to**(==) comparison operation with + """Returns the result of element-wise **equal to** (==) comparison operation with broadcasting. For each element in input arrays, return 1(true) if corresponding elements are same, @@ -1624,8 +1624,8 @@ def equal(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1677,7 +1677,7 @@ def equal(lhs, rhs): # pylint: enable= no-member, protected-access def not_equal(lhs, rhs): - """Returns the result of element-wise **not equal to**(!=) comparison operation + """Returns the result of element-wise **not equal to** (!=) comparison operation with broadcasting. For each element in input arrays, return 1(true) if corresponding elements are different, @@ -1687,8 +1687,8 @@ def not_equal(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1743,7 +1743,7 @@ def not_equal(lhs, rhs): # pylint: enable= no-member, protected-access def greater(lhs, rhs): - """Returns the result of element-wise **greater than**(>) comparison operation + """Returns the result of element-wise **greater than** (>) comparison operation with broadcasting. For each element in input arrays, return 1(true) if lhs elements are greater than rhs, @@ -1753,8 +1753,8 @@ def greater(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1806,7 +1806,7 @@ def greater(lhs, rhs): # pylint: enable= no-member, protected-access def greater_equal(lhs, rhs): - """Returns the result of element-wise **greater than or equal to**(>=) comparison + """Returns the result of element-wise **greater than or equal to** (>=) comparison operation with broadcasting. For each element in input arrays, return 1(true) if lhs elements are greater than equal to rhs, @@ -1816,8 +1816,8 @@ def greater_equal(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1869,7 +1869,8 @@ def greater_equal(lhs, rhs): # pylint: enable= no-member, protected-access def lesser(lhs, rhs): - """Returns the result of element-wise **lesser than**(<) comparison operation with broadcasting. + """Returns the result of element-wise **lesser than** (<) comparison operation + with broadcasting. For each element in input arrays, return 1(true) if lhs elements are less than rhs, otherwise return 0(false). @@ -1878,8 +1879,8 @@ def lesser(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1932,7 +1933,7 @@ def lesser(lhs, rhs): def lesser_equal(lhs, rhs): - """Returns the result of element-wise **lesser than or equal to**(<=) comparison + """Returns the result of element-wise **lesser than or equal to** (<=) comparison operation with broadcasting. For each element in input arrays, return 1(true) if lhs elements are @@ -1942,8 +1943,8 @@ def lesser_equal(lhs, rhs): .. note:: - If the corresponding dimensions of two arrays have the same size or one of them has size 1, - then the arrays are broadcastable to a common shape. + If the corresponding dimensions of two arrays have the same size or one of them has size 1, + then the arrays are broadcastable to a common shape. Parameters ---------- @@ -1995,7 +1996,8 @@ def lesser_equal(lhs, rhs): # pylint: enable= no-member, protected-access def true_divide(lhs, rhs): - """Same as ``divide``. + + """This function is similar to :meth:`divide`. """ return divide(lhs, rhs) diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h index 849fbcef4193..c2acbf164197 100644 --- a/src/operator/sequence_last-inl.h +++ b/src/operator/sequence_last-inl.h @@ -34,7 +34,7 @@ struct SequenceLastParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(use_sequence_length) .set_default(false) .describe( - "If set to true, this layer takes in extra input sequence_length " + "If set to true, this layer takes in an extra input parameter `sequence_length` " "to specify variable length sequence"); } }; diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc index a930aaaccb4a..7c796613efa8 100644 --- a/src/operator/sequence_last.cc +++ b/src/operator/sequence_last.cc @@ -30,20 +30,56 @@ Operator *SequenceLastProp::CreateOperatorEx(Context ctx, DMLC_REGISTER_PARAMETER(SequenceLastParam); MXNET_REGISTER_OP_PROPERTY(SequenceLast, SequenceLastProp) - .describe( -"Takes the last element of a sequence. Takes an n-dimensional tensor of " -"the form [max sequence length, batchsize, other dims] and returns a (n-1)-dimensional tensor " -"of the form [batchsize, other dims]. This operator takes an optional input tensor " -"sequence_length of positive ints of dimension [batchsize] when the " -"sequence_length option is set to true. This allows the operator to handle " -"variable-length sequences. If sequence_length is false, then each example " -"in the batch is assumed to have the max sequence length." -) + .describe(R"code(Takes the last element of a sequence. + +This function takes an n-dimensional input array of the form +[max_sequence_length, batch_size, other_feature_dims] and returns a (n-1)-dimensional array +of the form [batch_size, other_feature_dims]. + +Parameter `sequence_length` is used to handle variable-length sequences. `sequence_length` should be +an input array of positive ints of dimension [batch_size]. To use this parameter, +set `use_sequence_length` to `True`, otherwise each example in the batch is assumed +to have the max sequence length. + +.. note:: Alternatively, you can also use `take` operator. + +Example:: + + x = [[[ 1., 2., 3.], + [ 4., 5., 6.], + [ 7., 8., 9.]], + + [[ 10., 11., 12.], + [ 13., 14., 15.], + [ 16., 17., 18.]], + + [[ 19., 20., 21.], + [ 22., 23., 24.], + [ 25., 26., 27.]]] + + // returns last sequence when sequence_length parameter is not used + SequenceLast(x) = [[ 19., 20., 21.], + [ 22., 23., 24.], + [ 25., 26., 27.]] + + // sequence_length y is used + SequenceLast(x, y=[1,1,1], use_sequence_length=True) = + [[ 1., 2., 3.], + [ 4., 5., 6.], + [ 7., 8., 9.]] + + // sequence_length y is used + SequenceLast(x, y=[1,2,3], use_sequence_length=True) = + [[ 1., 2., 3.], + [ 13., 14., 15.], + [ 25., 26., 27.]] + +)code" ADD_FILELINE) .add_argument("data", "NDArray-or-Symbol", - "n-dimensional input tensor of the form [max sequence " - "length, batchsize, other dims]") + "n-dimensional input array of the form [max_sequence_length," + " batch_size, other_feature_dims] where n>2") .add_argument("sequence_length", "NDArray-or-Symbol", - "vector of sequence lengths of size batchsize") + "vector of sequence lengths of the form [batch_size]") .add_arguments(SequenceLastParam::__FIELDS__()); } // namespace op diff --git a/src/operator/sequence_mask-inl.h b/src/operator/sequence_mask-inl.h index b6d9853f820a..69c98746553b 100644 --- a/src/operator/sequence_mask-inl.h +++ b/src/operator/sequence_mask-inl.h @@ -35,7 +35,7 @@ struct SequenceMaskParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(use_sequence_length) .set_default(false) .describe( - "If set to true, this layer takes in extra input sequence_length " + "If set to true, this layer takes in an extra input parameter `sequence_length` " "to specify variable length sequence"); DMLC_DECLARE_FIELD(value).set_default(0.).describe( "The value to be used as a mask."); diff --git a/src/operator/sequence_mask.cc b/src/operator/sequence_mask.cc index 47adda741c37..763bc17171ae 100644 --- a/src/operator/sequence_mask.cc +++ b/src/operator/sequence_mask.cc @@ -43,20 +43,78 @@ Operator *SequenceMaskProp::CreateOperatorEx(Context ctx, DMLC_REGISTER_PARAMETER(SequenceMaskParam); MXNET_REGISTER_OP_PROPERTY(SequenceMask, SequenceMaskProp) - .describe( -"Sets all elements outside the sequence to a constant value. Takes an n-dimensional tensor of the " -"form [max sequence length, batchsize, other dims] and returns a tensor of the same " -"shape. This operator takes an optional input tensor sequence_length of positive ints of " -"dimension [batchsize] when the sequence_length option is set to true. This allows the " -"operator to handle variable-length sequences. If sequence_length is false, then each " -"example in the batch is assumed to have the max sequence length, and this operator becomes " -"the identity operator." -) + .describe(R"code(Sets all elements outside the sequence to a constant value. + +This function takes an n-dimensional input array of the form +[max_sequence_length, batch_size, other_feature_dims] and returns an array of the same shape. + +Parameter `sequence_length` is used to handle variable-length sequences. `sequence_length` +should be an input array of positive ints of dimension [batch_size]. +To use this parameter, set `use_sequence_length` to `True`, +otherwise each example in the batch is assumed to have the max sequence length and +this operator works as the `identity` operator. + +Example:: + + x = [[[ 1., 2., 3.], + [ 4., 5., 6.]], + + [[ 7., 8., 9.], + [ 10., 11., 12.]], + + [[ 13., 14., 15.], + [ 16., 17., 18.]]] + + // Batch 1 + B1 = [[ 1., 2., 3.], + [ 7., 8., 9.], + [ 13., 14., 15.]] + + // Batch 2 + B2 = [[ 4., 5., 6.], + [ 10., 11., 12.], + [ 16., 17., 18.]] + + // works as identity operator when sequence_length parameter is not used + SequenceMask(x) = [[[ 1., 2., 3.], + [ 4., 5., 6.]], + + [[ 7., 8., 9.], + [ 10., 11., 12.]], + + [[ 13., 14., 15.], + [ 16., 17., 18.]]] + + // sequence_length [1,1] means 1 of each batch will be kept + // and other rows are masked with default mask value = 0 + SequenceMask(x, y=[1,1], use_sequence_length=True) = + [[[ 1., 2., 3.], + [ 4., 5., 6.]], + + [[ 0., 0., 0.], + [ 0., 0., 0.]], + + [[ 0., 0., 0.], + [ 0., 0., 0.]]] + + // sequence_length [2,3] means 2 of batch B1 and 3 of batch B2 will be kept + // and other rows are masked with value = 1 + SequenceMask(x, y=[2,3], use_sequence_length=True, value=1) = + [[[ 1., 2., 3.], + [ 4., 5., 6.]], + + [[ 7., 8., 9.], + [ 10., 11., 12.]], + + [[ 1., 1., 1.], + [ 16., 17., 18.]]] + +)code" ADD_FILELINE) .add_argument("data", "NDArray-or-Symbol", - "n-dimensional input tensor of the form [max sequence " - "length, batchsize, other dims]") + "n-dimensional input array of the form [max_sequence_length," + " batch_size, other_feature_dims] where n>2") .add_argument("sequence_length", "NDArray-or-Symbol", - "vector of sequence lengths of size batchsize") + "vector of sequence lengths of the form [batch_size]") .add_arguments(SequenceMaskParam::__FIELDS__()); } // namespace op diff --git a/src/operator/sequence_reverse-inl.h b/src/operator/sequence_reverse-inl.h index d7f296653dde..048eb3e2eb78 100644 --- a/src/operator/sequence_reverse-inl.h +++ b/src/operator/sequence_reverse-inl.h @@ -34,7 +34,7 @@ struct SequenceReverseParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(use_sequence_length) .set_default(false) .describe( - "If set to true, this layer takes in extra input sequence_length " + "If set to true, this layer takes in an extra input parameter `sequence_length` " "to specify variable length sequence"); } }; diff --git a/src/operator/sequence_reverse.cc b/src/operator/sequence_reverse.cc index 4d229ad8e9e6..871db9b3d486 100644 --- a/src/operator/sequence_reverse.cc +++ b/src/operator/sequence_reverse.cc @@ -30,19 +30,77 @@ Operator *SequenceReverseProp::CreateOperatorEx( DMLC_REGISTER_PARAMETER(SequenceReverseParam); MXNET_REGISTER_OP_PROPERTY(SequenceReverse, SequenceReverseProp) - .describe( -"Reverses the elements of each sequence. Takes an n-dimensional tensor of the form " -"[max sequence length, batchsize, other dims] and returns a tensor of the same shape. " -"This operator takes an optional input tensor sequence_length of positive ints of dimension " -"[batchsize] when the sequence_length option is set to true. This allows the operator to " -"handle variable-length sequences. If sequence_length is false, then each example " -"in the batch is assumed to have the max sequence length." -) + .describe(R"code(Reverses the elements of each sequence. + +This function takes an n-dimensional input array of the form [max_sequence_length, batch_size, other_feature_dims] +and returns an array of the same shape. + +Parameter `sequence_length` is used to handle variable-length sequences. +`sequence_length` should be an input array of positive ints of dimension [batch_size]. +To use this parameter, set `use_sequence_length` to `True`, +otherwise each example in the batch is assumed to have the max sequence length. + +Example:: + + x = [[[ 1., 2., 3.], + [ 4., 5., 6.]], + + [[ 7., 8., 9.], + [ 10., 11., 12.]], + + [[ 13., 14., 15.], + [ 16., 17., 18.]]] + + // Batch 1 + B1 = [[ 1., 2., 3.], + [ 7., 8., 9.], + [ 13., 14., 15.]] + + // Batch 2 + B2 = [[ 4., 5., 6.], + [ 10., 11., 12.], + [ 16., 17., 18.]] + + // returns reverse sequence when sequence_length parameter is not used + SequenceReverse(x) = [[[ 13., 14., 15.], + [ 16., 17., 18.]], + + [[ 7., 8., 9.], + [ 10., 11., 12.]], + + [[ 1., 2., 3.], + [ 4., 5., 6.]]] + + // sequence_length [2,2] means 2 rows of + // both batch B1 and B2 will be reversed. + SequenceReverse(x, y=[2,2], use_sequence_length=True) = + [[[ 7., 8., 9.], + [ 10., 11., 12.]], + + [[ 1., 2., 3.], + [ 4., 5., 6.]], + + [[ 13., 14., 15.], + [ 16., 17., 18.]]] + + // sequence_length [2,3] means 2 of batch B2 and 3 of batch B3 + // will be reversed. + SequenceReverse(x, y=[2,3], use_sequence_length=True) = + [[[ 7., 8., 9.], + [ 16., 17., 18.]], + + [[ 1., 2., 3.], + [ 10., 11., 12.]], + + [[ 13., 14, 15.], + [ 4., 5., 6.]]] + +)code" ADD_FILELINE) .add_argument("data", "NDArray-or-Symbol", - "n-dimensional input tensor of the form [max sequence " - "length, batchsize, other dims]") + "n-dimensional input array of the form [max_sequence_length," + " batch_size, other dims] where n>2 ") .add_argument("sequence_length", "NDArray-or-Symbol", - "vector of sequence lengths of size batchsize") + "vector of sequence lengths of the form [batch_size]") .add_arguments(SequenceReverseParam::__FIELDS__()); } // namespace op diff --git a/src/operator/tensor/broadcast_reduce_op_index.cc b/src/operator/tensor/broadcast_reduce_op_index.cc index 87929d12140e..dc1e33bf395a 100644 --- a/src/operator/tensor/broadcast_reduce_op_index.cc +++ b/src/operator/tensor/broadcast_reduce_op_index.cc @@ -8,17 +8,71 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_REDUCE_AXIS(argmax) -.MXNET_DESCRIBE("Returns the indices of the maximum values along an axis.") +.describe(R"code(Returns indices of the maximum values along an axis.

 + +In the case of multiple occurrences of maximum values, the indices corresponding to the first occurrence +are returned. + +Example::

 + + x = [[ 0., 1., 2.],
 + [ 3., 4., 5.]]

 + + // argmax along axis 0 + argmax(x, axis=0) = [ 1., 1., 1.]


 + + // argmax along axis 1 + argmax(x, axis=1) = [ 2., 2.]


 + + // argmax along axis 1 keeping same dims as an input array + argmax(x, axis=1, keepdims=True) = [[ 2.],
 + [ 2.]]


 +)code" ADD_FILELINE) .set_attr("FCompute", SearchAxisCompute) .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_REDUCE_AXIS(argmin) -.MXNET_DESCRIBE("Returns the indices of the minimum values along an axis.") +.describe(R"code(Returns indices of the minimum values along an axis. + +In the case of multiple occurrences of minimum values, the indices corresponding to the first occurrence +are returned. + +Example::

 + + x = [[ 0., 1., 2.],
 + [ 3., 4., 5.]]

 + + // argmin along axis 0 + argmin(x, axis=0) = [ 0., 0., 0.]


 + + // argmin along axis 1 + argmin(x, axis=1) = [ 0., 0.]


 + + // argmin along axis 1 keeping same dims as an input array + argmin(x, axis=1, keepdims=True) = [[ 0.],
 + [ 0.]]


 + +)code" ADD_FILELINE) .set_attr("FCompute", SearchAxisCompute) .set_attr("FGradient", MakeZeroGradNodes); // Legacy support NNVM_REGISTER_OP(argmax_channel) +.describe(R"code(Returns argmax indices of each channel from the input array. + +The result will be an NDArray of shape (num_channel,). + +In case of multiple occurrences of the maximum values, the indices corresponding to the first occurrence +are returned. + +Example::

 + + x = [[ 0., 1., 2.],
 + [ 3., 4., 5.]]

 + + argmax_channel(x) = [ 2., 2.]


 + +)code" ADD_FILELINE) .set_num_inputs(1) .set_num_outputs(1) .set_attr_parser([](NodeAttrs* attrs) { @@ -30,7 +84,7 @@ NNVM_REGISTER_OP(argmax_channel) .set_attr("FInferShape", ReduceAxisShape) .set_attr("FInferType", ElemwiseType<1, 1>) .set_attr("FCompute", SearchAxisCompute) -.add_argument("data", "NDArray-or-Symbol", "Source input"); +.add_argument("data", "NDArray-or-Symbol", "The input array"); NNVM_REGISTER_OP(pick) .set_num_inputs(2) @@ -52,7 +106,7 @@ NNVM_REGISTER_OP(pick) ret.emplace_back(nnvm::NodeEntry{p, 0, 0}); return ret; }) -.add_argument("data", "NDArray-or-Symbol", "Source input") +.add_argument("data", "NDArray-or-Symbol", "The input array") .add_argument("index", "NDArray-or-Symbol", "Index array") .add_arguments(ReduceAxisParam::__FIELDS__()); diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc b/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc index c6823831decd..0d0a1d8b5df0 100755 --- a/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc +++ b/src/operator/tensor/elemwise_binary_broadcast_op_basic.cc @@ -52,7 +52,7 @@ MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_sub) .add_alias("broadcast_minus") .describe(R"code(Returns element-wise difference of the input arrays with broadcasting. -`broadcast_minus` is an alias to the function `broadcast_sub` +`broadcast_minus` is an alias to the function `broadcast_sub`. Example:: diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc index 1bd3323fa1db..127d8c0ec1c5 100755 --- a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc +++ b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc @@ -10,8 +10,7 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_power) -.describe(R"code(Returns result of first array elements raised to powers from second array, element-wise - with broadcasting. +.describe(R"code(Returns result of first array elements raised to powers from second array, element-wise with broadcasting. Example:: diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc index 37535a270b79..900f376fe421 100644 --- a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc +++ b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc @@ -11,7 +11,7 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_equal) -.describe(R"code(Returns the result of element-wise **equal to**(==) comparison operation with broadcasting. +.describe(R"code(Returns the result of element-wise **equal to** (==) comparison operation with broadcasting. Example:: @@ -29,7 +29,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_not_equal) -.describe(R"code(Returns the result of element-wise **not equal to**(!=) comparison operation with broadcasting. +.describe(R"code(Returns the result of element-wise **not equal to** (!=) comparison operation with broadcasting. Example:: @@ -47,7 +47,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_greater) -.describe(R"code(Returns the result of element-wise **greater than**(>) comparison operation with broadcasting. +.describe(R"code(Returns the result of element-wise **greater than** (>) comparison operation with broadcasting. Example:: @@ -65,7 +65,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_greater_equal) -.describe(R"code(Returns the result of element-wise **greater than or equal to**(>=) comparison operation with broadcasting. +.describe(R"code(Returns the result of element-wise **greater than or equal to** (>=) comparison operation with broadcasting. Example:: @@ -83,7 +83,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_lesser) -.describe(R"code(Returns the result of element-wise **lesser than**(<) comparison operation with broadcasting. +.describe(R"code(Returns the result of element-wise **lesser than** (<) comparison operation with broadcasting. Example:: @@ -101,7 +101,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_lesser_equal) -.describe(R"code(Returns the result of element-wise **lesser than or equal to**(<=) comparison operation with broadcasting. +.describe(R"code(Returns the result of element-wise **lesser than or equal to** (<=) comparison operation with broadcasting. Example:: diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index eac176b486e8..782dc0d7b3c5 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -52,28 +52,31 @@ NNVM_REGISTER_OP(_backward_Embedding) NNVM_REGISTER_OP(take) -.describe(R"code(Take elements from an array along an axis. +.describe(R"code(Takes elements from an input array along the given axis. -Slice along a particular axis with the provided indices. E.g., given an input array -with shape ``(d0, d1, d2)`` and indices with shape ``(i0, i1)``, then the output -will have shape ``(i0, i1, d1, d2)``, with:: +This function slices the input array along a particular axis with the provided indices. + +Given an input array with shape ``(d0, d1, d2)`` and indices with shape ``(i0, i1)``, the output +will have shape ``(i0, i1, d1, d2)``, computed by:: output[i,j,:,:] = input[indices[i,j],:,:] +.. note:: + - `axis`- Only slicing along axis 0 is supported for now. + - `mode`- Only `clip` mode is supported for now. + Examples:: x = [[ 1., 2.], [ 3., 4.], [ 5., 6.]] - take(x, [[0,1],[1,2]]) = [[[ 1., 2.], - [ 3., 4.]], + // takes elements with specified indices along axis 0 + take(x, [[0,1],[1,2]]) = [[[ 1., 2.], + [ 3., 4.]], - [[ 3., 4.], - [ 5., 6.]]] - -.. note:: - Only slicing axis 0 is supported now. + [[ 3., 4.], + [ 5., 6.]]] )code" ADD_FILELINE) .set_num_inputs(2) @@ -95,8 +98,8 @@ Examples:: return MakeNonlossGradNode("_backward_take", n, ograds, {n->inputs[1]}, n->attrs.dict); }) -.add_argument("a", "NDArray-or-Symbol", "The source array.") -.add_argument("indices", "NDArray-or-Symbol", "The indices of the values to extract.") +.add_argument("a", "NDArray-or-Symbol", "The input array.") +.add_argument("indices", "NDArray-or-Symbol", "The indices of the values to be extracted.") .add_arguments(TakeParam::__FIELDS__()); NNVM_REGISTER_OP(_backward_take) @@ -111,10 +114,13 @@ NNVM_REGISTER_OP(_backward_take) NNVM_REGISTER_OP(batch_take) -.describe(R"code(Take elements from a data batch. +.describe(R"code(Takes elements from a data batch. + +.. note:: + `batch_take` is deprecated. Use `pick` instead. -Given an ``(d0, d1)`` input array, and ``(d0,)`` indices, the output will be a -``(d0,)`` computed by:: +Given an input array of shape ``(d0, d1)`` and indices of shape ``(d0,)``, the result will be +an output array of shape ``(d0,)`` with:: output[i] = input[i, indices[i]] @@ -124,6 +130,7 @@ Examples:: [ 3., 4.], [ 5., 6.]] + // takes elements with specified indices batch_take(x, [0,1,0]) = [ 1. 4. 5.] )code" ADD_FILELINE) @@ -136,8 +143,8 @@ Examples:: .set_attr("FInferShape", BatchTakeOpShape) .set_attr("FInferType", BatchTakeOpType) .set_attr("FCompute", BatchTakeOpForward) -.add_argument("a", "NDArray-or-Symbol", "Input data array") -.add_argument("indices", "NDArray-or-Symbol", "index array"); +.add_argument("a", "NDArray-or-Symbol", "The input array") +.add_argument("indices", "NDArray-or-Symbol", "The index array"); NNVM_REGISTER_OP(one_hot) .describe(R"code(Returns a one-hot array. diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index fc1f398de18e..5cbe08ed5a34 100755 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -330,13 +330,17 @@ struct TakeParam: public dmlc::Parameter { DMLC_DECLARE_FIELD(axis) .set_lower_bound(0) .set_default(0) - .describe("the axis of data tensor to be taken."); + .describe("The axis of input array to be taken."); DMLC_DECLARE_FIELD(mode) .add_enum("raise", take_::kRaise) .add_enum("wrap", take_::kWrap) .add_enum("clip", take_::kClip) .set_default(take_::kClip) - .describe("specify how out-of-bound indices bahave."); + .describe("Specify how out-of-bound indices bahave." + " \"clip\" means clip to the range. So, if all indices mentioned are too large," + " they are replaced by the index that addresses the last element along an axis. " + " \"wrap\" means to wrap around. " + " \"raise\" means to raise an error. "); } }; diff --git a/src/operator/tensor/ordering_op-inl.h b/src/operator/tensor/ordering_op-inl.h index b02d607d9efe..38d47bb8e21b 100755 --- a/src/operator/tensor/ordering_op-inl.h +++ b/src/operator/tensor/ordering_op-inl.h @@ -51,13 +51,13 @@ struct TopKParam : public dmlc::Parameter { .add_enum("indices", topk_enum::kReturnIndices) .add_enum("mask", topk_enum::kReturnMask) .add_enum("both", topk_enum::kReturnBoth) - .describe("The return type." - " \"value\" means returning the top k values," - " \"indices\" means returning the indices of the top k values," + .describe("The return type.\n" + " \"value\" means to return the top k values," + " \"indices\" means to return the indices of the top k values," " \"mask\" means to return a mask array containing 0 and 1. 1 means the top k values." - " \"both\" means to return both value and indices."); + " \"both\" means to return a list of both values and indices of top k elements."); DMLC_DECLARE_FIELD(is_ascend).set_default(false) - .describe("Whether to choose k largest or k smallest." + .describe("Whether to choose k largest or k smallest elements." " Top K largest elements will be chosen if set to false."); } }; @@ -70,7 +70,7 @@ struct SortParam : public dmlc::Parameter { .describe("Axis along which to choose sort the input tensor." " If not given, the flattened array is used. Default is -1."); DMLC_DECLARE_FIELD(is_ascend).set_default(true) - .describe("Whether sort in ascending or descending order."); + .describe("Whether to sort in ascending or descending order."); } }; @@ -82,7 +82,7 @@ struct ArgSortParam : public dmlc::Parameter { .describe("Axis along which to sort the input tensor." " If not given, the flattened array is used. Default is -1."); DMLC_DECLARE_FIELD(is_ascend).set_default(true) - .describe("Whether sort in ascending or descending order."); + .describe("Whether to sort in ascending or descending order."); } }; diff --git a/src/operator/tensor/ordering_op.cc b/src/operator/tensor/ordering_op.cc index d9ad0f6efbaa..3308836c1840 100644 --- a/src/operator/tensor/ordering_op.cc +++ b/src/operator/tensor/ordering_op.cc @@ -15,23 +15,31 @@ DMLC_REGISTER_PARAMETER(SortParam); DMLC_REGISTER_PARAMETER(ArgSortParam); NNVM_REGISTER_OP(topk) -.describe(R"code(Return the top *k* elements in an array. +.describe(R"code(Returns the top *k* elements in an input array along the given axis. Examples:: x = [[ 0.3, 0.2, 0.4], [ 0.1, 0.3, 0.2]] - // return the index of the largest element on last axis + // returns an index of the largest element on last axis topk(x) = [[ 2.], [ 1.]] - // return the value of the top-2 elements on last axis + // returns the value of top-2 largest elements on last axis topk(x, ret_typ='value', k=2) = [[ 0.4, 0.3], [ 0.3, 0.2]] - // flatten and then return both index and value - topk(x, ret_typ='both', k=2, axis=None) = [ 0.4, 0.3], [ 2., 0.] + // returns the value of top-2 smallest elements on last axis + topk(x, ret_typ='value', k=2, is_ascend=1) = [[ 0.2 , 0.3], + [ 0.1 , 0.2]] + + // returns the value of top-2 largest elements on axis 0 + topk(x, axis=0, ret_typ='value', k=2) = [[ 0.3, 0.3, 0.4], + [ 0.1, 0.2, 0.2]] + + // flattens and then returns list of both values and indices + topk(x, ret_typ='both', k=2) = [[[ 0.4, 0.3], [ 0.3, 0.2]] , [[ 2., 0.], [ 1., 2.]]] )code" ADD_FILELINE) .set_num_inputs(1) @@ -59,7 +67,7 @@ Examples:: [](const NodeAttrs& attrs) { return std::vector{ResourceRequest::kTempSpace}; }) -.add_argument("data", "NDArray-or-Symbol", "Source input") +.add_argument("data", "NDArray-or-Symbol", "The input array") .add_arguments(TopKParam::__FIELDS__()); NNVM_REGISTER_OP(_backward_topk) @@ -74,21 +82,21 @@ NNVM_REGISTER_OP(_backward_topk) }); NNVM_REGISTER_OP(sort) -.describe(R"code(Return a sorted copy of an array. +.describe(R"code(Returns a sorted copy of an input array along the given axis. Examples:: x = [[ 1, 4], [ 3, 1]] - // sort along the last axis + // sorts along the last axis sort(x) = [[ 1., 4.], [ 1., 3.]] - // flatten and then sort - sort(x, axis=None) = [ 1., 1., 3., 4.] + // flattens and then sorts + sort(x) = [ 1., 1., 3., 4.] - // sort long the first axis + // sorts along the first axis sort(x, axis=0) = [[ 1., 1.], [ 3., 4.]] @@ -122,11 +130,14 @@ Examples:: [](const NodeAttrs& attrs) { return std::vector{ResourceRequest::kTempSpace}; }) -.add_argument("data", "NDArray-or-Symbol", "Source input") +.add_argument("data", "NDArray-or-Symbol", "The input array") .add_arguments(SortParam::__FIELDS__()); NNVM_REGISTER_OP(argsort) -.describe(R"code(Returns the indices that can sort an array. +.describe(R"code(Returns the indices that would sort an input array along the given axis. + +This function performs sorting along the given axis and returns an array of indices having same shape +as an input array that index data in sorted order. Examples:: @@ -142,7 +153,7 @@ Examples:: [ 0., 1., 0.]] // flatten and then sort - argsort(x, axis=None) = [ 3., 1., 5., 0., 4., 2.] + argsort(x) = [ 3., 1., 5., 0., 4., 2.] )code" ADD_FILELINE) .set_num_inputs(1) .set_num_outputs(1) @@ -155,7 +166,7 @@ Examples:: [](const NodeAttrs& attrs) { return std::vector{ResourceRequest::kTempSpace}; }) -.add_argument("data", "NDArray-or-Symbol", "Source input") +.add_argument("data", "NDArray-or-Symbol", "The input array") .add_arguments(ArgSortParam::__FIELDS__()); } // namespace op } // namespace mxnet