From 81fcff86aa30fb188a66aea2bd682dc1ad08ec16 Mon Sep 17 00:00:00 2001
From: Cjkkkk <656569648@qq.com>
Date: Tue, 12 Nov 2019 16:13:44 +0800
Subject: [PATCH 1/3] Api refactor (#1728)

api refactor for compression, especially, quantization APIs
---
 docs/en_US/Compressor/Overview.md             |  52 +++++++-
 .../pynni/nni/compression/torch/compressor.py | 112 +++++++++++++++---
 src/sdk/pynni/tests/test_compressor.py        |   9 +-
 3 files changed, 147 insertions(+), 26 deletions(-)

diff --git a/docs/en_US/Compressor/Overview.md b/docs/en_US/Compressor/Overview.md
index 5fc8e45c5d..b078d748a6 100644
--- a/docs/en_US/Compressor/Overview.md
+++ b/docs/en_US/Compressor/Overview.md
@@ -180,12 +180,54 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
 
     def quantize_weight(self, weight, config, **kwargs):
         """
-        weight is the target weight tensor
-        config is the selected dict object in config_list for this layer
-        kwargs contains op, op_types, and op_name
-        design your quantizer and return new weight
+        quantize should overload this method to quantize weight tensors.
+        This method is effectively hooked to :meth:`forward` of the model.
+
+        Parameters
+        ----------
+        weight : Tensor
+            weight that needs to be quantized
+        config : dict
+            the configuration for weight quantization
         """
+
+        # Put your code to generate `new_weight` here
+
         return new_weight
+    
+    def quantize_output(self, output, config, **kwargs):
+        """
+        quantize should overload this method to quantize output.
+        This method is effectively hooked to `:meth:`forward` of the model.
+
+        Parameters
+        ----------
+        output : Tensor
+            output that needs to be quantized
+        config : dict
+            the configuration for output quantization
+        """
+
+        # Put your code to generate `new_output` here
+
+        return new_output
+
+    def quantize_input(self, *inputs, config, **kwargs):
+        """
+        quantize should overload this method to quantize input.
+        This method is effectively hooked to :meth:`forward` of the model.
+
+        Parameters
+        ----------
+        inputs : Tensor
+            inputs that needs to be quantized
+        config : dict
+            the configuration for inputs quantization
+        """
+
+        # Put your code to generate `new_input` here
+
+        return new_input
 
     # note for pytorch version, there is no sess in input arguments
     def update_epoch(self, epoch_num, sess):
@@ -200,8 +242,6 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
         pass
 ```
 
-__[TODO]__ Will add another member function `quantize_layer_output`, as some quantization algorithms also quantize layers' output.
-
 ### Usage of user customized compression algorithm
 
 __[TODO]__ ...
diff --git a/src/sdk/pynni/nni/compression/torch/compressor.py b/src/sdk/pynni/nni/compression/torch/compressor.py
index 65d2e90f13..bb06524fba 100644
--- a/src/sdk/pynni/nni/compression/torch/compressor.py
+++ b/src/sdk/pynni/nni/compression/torch/compressor.py
@@ -32,7 +32,23 @@ def __init__(self, model, config_list):
         """
         self.bound_model = model
         self.config_list = config_list
-        self.modules_to_compress = []
+        self.modules_to_compress = None
+
+    def detect_modules_to_compress(self):
+        """
+        detect all modules should be compressed, and save the result in `self.modules_to_compress`.
+
+        The model will be instrumented and user should never edit it after calling this method.
+        """
+        if self.modules_to_compress is None:
+            self.modules_to_compress = []
+            for name, module in self.bound_model.named_modules():
+                layer = LayerInfo(name, module)
+                config = self.select_config(layer)
+                if config is not None:
+                    self.modules_to_compress.append((layer, config))
+        return self.modules_to_compress
+
 
     def compress(self):
         """
@@ -41,12 +57,9 @@ def compress(self):
         The model will be instrumented and user should never edit it after calling this method.
         `self.modules_to_compress` records all the to-be-compressed layers
         """
-        for name, module in self.bound_model.named_modules():
-            layer = LayerInfo(name, module)
-            config = self.select_config(layer)
-            if config is not None:
-                self._instrument_layer(layer, config)
-                self.modules_to_compress.append((layer, config))
+        modules_to_compress = self.detect_modules_to_compress()
+        for layer, config in modules_to_compress:
+            self._instrument_layer(layer, config)
         return self.bound_model
 
     def get_modules_to_compress(self):
@@ -55,7 +68,7 @@ def get_modules_to_compress(self):
 
         Returns
         -------
-        self.modules_to_compress : list
+        list
             a list of the layers, each of which is a tuple (`layer`, `config`),
             `layer` is `LayerInfo`, `config` is a `dict`
         """
@@ -72,7 +85,7 @@ def select_config(self, layer):
 
         Returns
         -------
-        ret : config or None
+        config or None
             the retrieved configuration for this layer, if None, this layer should
             not be compressed
         """
@@ -240,26 +253,87 @@ class Quantizer(Compressor):
     """
 
     def quantize_weight(self, weight, config, op, op_type, op_name):
-        """user should know where dequantize goes and implement it in quantize method
-        we now do not provide dequantize method
+        """
+        quantize should overload this method to quantize weight.
+        This method is effectively hooked to :meth:`forward` of the model.
+
+        Parameters
+        ----------
+        weight : Tensor
+            weight that needs to be quantized
+        config : dict
+            the configuration for weight quantization
         """
         raise NotImplementedError("Quantizer must overload quantize_weight()")
 
+    def quantize_output(self, output, config, op, op_type, op_name):
+        """
+        quantize should overload this method to quantize output.
+        This method is effectively hooked to :meth:`forward` of the model.
+
+        Parameters
+        ----------
+        output : Tensor
+            output that needs to be quantized
+        config : dict
+            the configuration for output quantization
+        """
+        raise NotImplementedError("Quantizer must overload quantize_output()")
+
+    def quantize_input(self, *inputs, config, op, op_type, op_name):
+        """
+        quantize should overload this method to quantize input.
+        This method is effectively hooked to :meth:`forward` of the model.
+
+        Parameters
+        ----------
+        inputs : Tensor
+            inputs that needs to be quantized
+        config : dict
+            the configuration for inputs quantization
+        """
+        raise NotImplementedError("Quantizer must overload quantize_input()")
+
+
     def _instrument_layer(self, layer, config):
+        """
+        Create a wrapper forward function to replace the original one.
+
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the mask
+        config : dict
+            the configuration for quantization
+        """
         assert layer._forward is None, 'Each model can only be compressed once'
-        if not _check_weight(layer.module):
-            _logger.warning('Module %s does not have parameter "weight"', layer.name)
-            return
+        assert "quant_types" in config, 'must provide quant_types in config'
+        assert isinstance(config["quant_types"], list), 'quant_types must be list type'
+
+        if 'weight' in config["quant_types"]:
+            if not _check_weight(layer.module):
+                _logger.warning('Module %s does not have parameter "weight"', layer.name)
         layer._forward = layer.module.forward
 
         def new_forward(*inputs):
-            weight = layer.module.weight.data
-            new_weight = self.quantize_weight(weight, config, op=layer.module, op_type=layer.type, op_name=layer.name)
-            layer.module.weight.data = new_weight
-            return layer._forward(*inputs)
+            if 'input' in config["quant_types"]:
+                inputs = self.quantize_input(inputs, config=config, op=layer.module, op_type=layer.type, op_name=layer.name)
+
+            if 'weight' in config["quant_types"] and _check_weight(layer.module):
+                weight = layer.module.weight.data
+                new_weight = self.quantize_weight(weight, config, op=layer.module, op_type=layer.type, op_name=layer.name)
+                layer.module.weight.data = new_weight
+                result = layer._forward(*inputs)
+                layer.module.weight.data = weight
+            else:
+                result = layer._forward(*inputs)
 
-        layer.module.forward = new_forward
+            if 'output' in config["quant_types"]:
+                result = self.quantize_output(result, config, op=layer.module, op_type=layer.type, op_name=layer.name)
 
+            return result
+
+        layer.module.forward = new_forward
 
 def _check_weight(module):
     try:
diff --git a/src/sdk/pynni/tests/test_compressor.py b/src/sdk/pynni/tests/test_compressor.py
index ca8b628640..e4eb0bbe46 100644
--- a/src/sdk/pynni/tests/test_compressor.py
+++ b/src/sdk/pynni/tests/test_compressor.py
@@ -114,7 +114,14 @@ def test_torch_pruner(self):
 
     def test_torch_quantizer(self):
         model = TorchMnist()
-        torch_compressor.NaiveQuantizer(model, [{'op_types': ['default']}]).compress()
+        configure_list = [{
+            'quant_types': ['weight'],
+            'quant_bits': {
+                'weight': 8,
+            },
+            'op_types':['Conv2d', 'Linear']
+        }]
+        torch_compressor.NaiveQuantizer(model, configure_list).compress()
 
 
 if __name__ == '__main__':

From 926c42dec29c9a70db5f8669aa0ed3ce4d22978c Mon Sep 17 00:00:00 2001
From: QuanluZhang <Quanlu.Zhang@microsoft.com>
Date: Wed, 13 Nov 2019 08:11:21 +0800
Subject: [PATCH 2/3] improve doc docstring of gridsearch/smac/ppo (#1693)

---
 docs/en_US/sdk_reference.rst                  |   7 +-
 .../pynni/nni/gridsearch_tuner/__init__.py    |   1 +
 .../nni/gridsearch_tuner/gridsearch_tuner.py  | 119 ++++++++---
 src/sdk/pynni/nni/ppo_tuner/__init__.py       |   1 +
 src/sdk/pynni/nni/ppo_tuner/distri.py         |   6 +-
 src/sdk/pynni/nni/ppo_tuner/model.py          |  25 +--
 src/sdk/pynni/nni/ppo_tuner/policy.py         |  60 ++++--
 src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py      | 197 ++++++++++++------
 src/sdk/pynni/nni/ppo_tuner/util.py           |  37 ++--
 .../nni/smac_tuner/convert_ss_to_scenario.py  |  15 +-
 src/sdk/pynni/nni/smac_tuner/smac_tuner.py    | 109 ++++++----
 src/sdk/pynni/nni/tuner.py                    |   7 +-
 12 files changed, 400 insertions(+), 184 deletions(-)

diff --git a/docs/en_US/sdk_reference.rst b/docs/en_US/sdk_reference.rst
index de274fabec..6b4d6d8d79 100644
--- a/docs/en_US/sdk_reference.rst
+++ b/docs/en_US/sdk_reference.rst
@@ -24,10 +24,10 @@ Tuner
 ..  autoclass:: nni.evolution_tuner.evolution_tuner.EvolutionTuner
     :members:
 
-..  autoclass:: nni.smac_tuner.smac_tuner.SMACTuner
+..  autoclass:: nni.smac_tuner.SMACTuner
     :members:
 
-..  autoclass:: nni.gridsearch_tuner.gridsearch_tuner.GridSearchTuner
+..  autoclass:: nni.gridsearch_tuner.GridSearchTuner
     :members:
 
 ..  autoclass:: nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner
@@ -36,6 +36,9 @@ Tuner
 ..  autoclass:: nni.metis_tuner.metis_tuner.MetisTuner
     :members:
 
+..  autoclass:: nni.ppo_tuner.PPOTuner
+    :members:
+
 ..  autoclass:: nni.batch_tuner.batch_tuner.BatchTuner
     :members:
 
diff --git a/src/sdk/pynni/nni/gridsearch_tuner/__init__.py b/src/sdk/pynni/nni/gridsearch_tuner/__init__.py
index e69de29bb2..705d242e98 100644
--- a/src/sdk/pynni/nni/gridsearch_tuner/__init__.py
+++ b/src/sdk/pynni/nni/gridsearch_tuner/__init__.py
@@ -0,0 +1 @@
+from .gridsearch_tuner import GridSearchTuner
\ No newline at end of file
diff --git a/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py b/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py
index ebe09b1cf3..7a8cd49dc7 100644
--- a/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py
+++ b/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py
@@ -17,10 +17,10 @@
 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-'''
+"""
 gridsearch_tuner.py including:
     class GridSearchTuner
-'''
+"""
 
 import copy
 import logging
@@ -37,29 +37,40 @@ class GridSearchTuner
 logger = logging.getLogger('grid_search_AutoML')
 
 class GridSearchTuner(Tuner):
-    '''
+    """
     GridSearchTuner will search all the possible configures that the user define in the searchSpace.
-    The only acceptable types of search space are 'choice', 'quniform', 'randint'
+    The only acceptable types of search space are ``choice``, ``quniform``, ``randint``
 
-    Type 'choice' will select one of the options. Note that it can also be nested.
+    Type ``choice`` will select one of the options. Note that it can also be nested.
 
-    Type 'quniform' will receive three values [low, high, q], where [low, high] specifies a range and 'q' specifies the interval
-    It will be sampled in a way that the first sampled value is 'low',
+    Type ``quniform`` will receive three values [``low``, ``high``, ``q``],
+    where [``low``, ``high``] specifies a range and ``q`` specifies the interval.
+    It will be sampled in a way that the first sampled value is ``low``,
     and each of the following values is 'interval' larger than the value in front of it.
 
-    Type 'randint' gives all possible intergers in range[low, high). Note that 'high' is not included.
-    '''
+    Type ``randint`` gives all possible intergers in range[``low``, ``high``). Note that ``high`` is not included.
+    """
 
     def __init__(self):
         self.count = -1
         self.expanded_search_space = []
         self.supplement_data = dict()
 
-    def json2parameter(self, ss_spec):
-        '''
-        generate all possible configs for hyperparameters from hyperparameter space.
-        ss_spec: hyperparameter space
-        '''
+    def _json2parameter(self, ss_spec):
+        """
+        Generate all possible configs for hyperparameters from hyperparameter space.
+
+        Parameters
+        ----------
+        ss_spec : dict or list
+            Hyperparameter space or the ``_value`` of a hyperparameter
+
+        Returns
+        -------
+        list or dict
+            All the candidate choices of hyperparameters. for a hyperparameter, chosen_params
+            is a list. for multiple hyperparameters (e.g., search space), chosen_params is a dict.
+        """
         if isinstance(ss_spec, dict):
             if '_type' in ss_spec.keys():
                 _type = ss_spec['_type']
@@ -67,7 +78,7 @@ def json2parameter(self, ss_spec):
                 chosen_params = list()
                 if _type == 'choice':
                     for value in _value:
-                        choice = self.json2parameter(value)
+                        choice = self._json2parameter(value)
                         if isinstance(choice, list):
                             chosen_params.extend(choice)
                         else:
@@ -81,12 +92,12 @@ def json2parameter(self, ss_spec):
             else:
                 chosen_params = dict()
                 for key in ss_spec.keys():
-                    chosen_params[key] = self.json2parameter(ss_spec[key])
-                return self.expand_parameters(chosen_params)
+                    chosen_params[key] = self._json2parameter(ss_spec[key])
+                return self._expand_parameters(chosen_params)
         elif isinstance(ss_spec, list):
             chosen_params = list()
             for subspec in ss_spec[1:]:
-                choice = self.json2parameter(subspec)
+                choice = self._json2parameter(subspec)
                 if isinstance(choice, list):
                     chosen_params.extend(choice)
                 else:
@@ -97,27 +108,39 @@ def json2parameter(self, ss_spec):
         return chosen_params
 
     def _parse_quniform(self, param_value):
-        '''parse type of quniform parameter and return a list'''
+        """
+        Parse type of quniform parameter and return a list
+        """
         low, high, q = param_value[0], param_value[1], param_value[2]
         return np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high)
 
     def _parse_randint(self, param_value):
-        '''parse type of randint parameter and return a list'''
+        """
+        Parse type of randint parameter and return a list
+        """
         return np.arange(param_value[0], param_value[1]).tolist()
 
-    def expand_parameters(self, para):
-        '''
+    def _expand_parameters(self, para):
+        """
         Enumerate all possible combinations of all parameters
-        para: {key1: [v11, v12, ...], key2: [v21, v22, ...], ...}
-        return: {{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...}
-        '''
+
+        Parameters
+        ----------
+        para : dict
+            {key1: [v11, v12, ...], key2: [v21, v22, ...], ...}
+
+        Returns
+        -------
+        dict
+            {{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...}
+        """
         if len(para) == 1:
             for key, values in para.items():
                 return list(map(lambda v: {key: v}, values))
 
         key = list(para)[0]
         values = para.pop(key)
-        rest_para = self.expand_parameters(para)
+        rest_para = self._expand_parameters(para)
         ret_para = list()
         for val in values:
             for config in rest_para:
@@ -126,12 +149,37 @@ def expand_parameters(self, para):
         return ret_para
 
     def update_search_space(self, search_space):
-        '''
-        Check if the search space is valid and expand it: support only 'choice', 'quniform', randint'
-        '''
-        self.expanded_search_space = self.json2parameter(search_space)
+        """
+        Check if the search space is valid and expand it: support only ``choice``, ``quniform``, ``randint``.
+
+        Parameters
+        ----------
+        search_space : dict
+            The format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
+        """
+        self.expanded_search_space = self._json2parameter(search_space)
 
     def generate_parameters(self, parameter_id, **kwargs):
+        """
+        Generate parameters for one trial.
+
+        Parameters
+        ----------
+        parameter_id : int
+            The id for the generated hyperparameter
+        **kwargs
+            Not used
+
+        Returns
+        -------
+        dict
+            One configuration from the expanded search space.
+
+        Raises
+        ------
+        NoMoreTrialError
+            If all the configurations has been sent, raise :class:`~nni.NoMoreTrialError`.
+        """
         self.count += 1
         while self.count <= len(self.expanded_search_space) - 1:
             _params_tuple = convert_dict2tuple(self.expanded_search_space[self.count])
@@ -142,15 +190,20 @@ def generate_parameters(self, parameter_id, **kwargs):
         raise nni.NoMoreTrialError('no more parameters now.')
 
     def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
+        """
+        Receive a trial's final performance result reported through :func:`~nni.report_final_result` by the trial.
+        GridSearchTuner does not need trial's results.
+        """
         pass
 
     def import_data(self, data):
-        """Import additional data for tuning
+        """
+        Import additional data for tuning
 
         Parameters
         ----------
-        data:
-            a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
+        list
+            A list of dictionarys, each of which has at least two keys, ``parameter`` and ``value``
         """
         _completed_num = 0
         for trial_info in data:
diff --git a/src/sdk/pynni/nni/ppo_tuner/__init__.py b/src/sdk/pynni/nni/ppo_tuner/__init__.py
index e69de29bb2..ada7e57c23 100644
--- a/src/sdk/pynni/nni/ppo_tuner/__init__.py
+++ b/src/sdk/pynni/nni/ppo_tuner/__init__.py
@@ -0,0 +1 @@
+from .ppo_tuner import PPOTuner
diff --git a/src/sdk/pynni/nni/ppo_tuner/distri.py b/src/sdk/pynni/nni/ppo_tuner/distri.py
index 5f00843b3e..39f539c870 100644
--- a/src/sdk/pynni/nni/ppo_tuner/distri.py
+++ b/src/sdk/pynni/nni/ppo_tuner/distri.py
@@ -77,7 +77,7 @@ def sample_placeholder(self, prepend_shape, name=None):
 
 class CategoricalPd(Pd):
     """
-    categorical prossibility distribution
+    Categorical prossibility distribution
     """
     def __init__(self, logits, mask_npinf, nsteps, size, is_act_model):
         self.logits = logits
@@ -154,7 +154,7 @@ def fromflat(cls, flat):
 
 class CategoricalPdType(PdType):
     """
-    to create CategoricalPd
+    To create CategoricalPd
     """
     def __init__(self, ncat, nsteps, np_mask, is_act_model):
         self.ncat = ncat
@@ -180,7 +180,7 @@ def sample_dtype(self):
 
 def _matching_fc(tensor, name, size, nsteps, init_scale, init_bias, np_mask, is_act_model):
     """
-    add fc op, and add mask op when not in action mode
+    Add fc op, and add mask op when not in action mode
     """
     if tensor.shape[-1] == size:
         assert False
diff --git a/src/sdk/pynni/nni/ppo_tuner/model.py b/src/sdk/pynni/nni/ppo_tuner/model.py
index 330f10369d..e042eec3d9 100644
--- a/src/sdk/pynni/nni/ppo_tuner/model.py
+++ b/src/sdk/pynni/nni/ppo_tuner/model.py
@@ -28,21 +28,18 @@
 class Model:
     """
     We use this object to :
-    __init__:
-    - Creates the step_model
-    - Creates the train_model
+        __init__:
+            - Creates the step_model
+            - Creates the train_model
 
-    train():
-    - Make the training part (feedforward and retropropagation of gradients)
+        train():
+            - Make the training part (feedforward and retropropagation of gradients)
 
-    save/load():
-    - Save load the model
+        save/load():
+            - Save load the model
     """
     def __init__(self, *, policy, nbatch_act, nbatch_train,
                  nsteps, ent_coef, vf_coef, max_grad_norm, microbatch_size=None, np_mask=None):
-        """
-        init
-        """
         self.sess = sess = get_session()
 
         with tf.variable_scope('ppo2_model', reuse=tf.AUTO_REUSE):
@@ -137,9 +134,13 @@ def __init__(self, *, policy, nbatch_act, nbatch_train,
 
     def train(self, lr, cliprange, obs, returns, masks, actions, values, neglogpacs, states=None):
         """
-        train the model.
+        Train the model.
         Here we calculate advantage A(s,a) = R + yV(s') - V(s)
-        Returns = R + yV(s')
+
+        Returns
+        -------
+        obj
+            = R + yV(s')
         """
         advs = returns - values
 
diff --git a/src/sdk/pynni/nni/ppo_tuner/policy.py b/src/sdk/pynni/nni/ppo_tuner/policy.py
index 980959a49e..309a60b46f 100644
--- a/src/sdk/pynni/nni/ppo_tuner/policy.py
+++ b/src/sdk/pynni/nni/ppo_tuner/policy.py
@@ -34,14 +34,20 @@ class PolicyWithValue:
 
     def __init__(self, env, observations, latent, estimate_q=False, vf_latent=None, sess=None, np_mask=None, is_act_model=False, **tensors):
         """
-        Parameters:
+        Parameters
         ----------
-        env:             RL environment
-        observations:    tensorflow placeholder in which the observations will be fed
-        latent:          latent state from which policy distribution parameters should be inferred
-        vf_latent:       latent state from which value function should be inferred (if None, then latent is used)
-        sess:            tensorflow session to run calculations in (if None, default session is used)
-        **tensors:       tensorflow tensors for additional attributes such as state or mask
+        env : obj
+            RL environment
+        observations : tensorflow placeholder
+            Tensorflow placeholder in which the observations will be fed
+        latent : tensor
+            Latent state from which policy distribution parameters should be inferred
+        vf_latent : tensor
+            Latent state from which value function should be inferred (if None, then latent is used)
+        sess : tensorflow session
+            Tensorflow session to run calculations in (if None, default session is used)
+        **tensors
+            Tensorflow tensors for additional attributes such as state or mask
         """
 
         self.X = observations
@@ -138,12 +144,14 @@ def step(self, step, observation, **extra_feed):
         """
         Compute next action(s) given the observation(s)
 
-        Parameters:
+        Parameters
         ----------
-        observation:     observation data (either single or a batch)
-        **extra_feed:    additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
+        observation : np array
+            Observation data (either single or a batch)
+        **extra_feed
+            Additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
 
-        Returns:
+        Returns
         -------
         (action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple
         """
@@ -157,22 +165,40 @@ def value(self, ob, *args, **kwargs):
         """
         Compute value estimate(s) given the observation(s)
 
-        Parameters:
+        Parameters
         ----------
-        observation:     observation data (either single or a batch)
-        **extra_feed:    additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
+        observation : np array
+            Observation data (either single or a batch)
+        **extra_feed
+            Additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)
 
-        Returns:
+        Returns
         -------
-        value estimate
+        Value estimate
         """
         return self._evaluate(self.vf, ob, *args, **kwargs)
 
 
 def build_lstm_policy(model_config, value_network=None, estimate_q=False, **policy_kwargs):
     """
-    build lstm policy and value network, they share the same lstm network.
+    Build lstm policy and value network, they share the same lstm network.
     the parameters all use their default values.
+
+    Parameter
+    ---------
+    model_config : obj
+        Configurations of the model
+    value_network : obj
+        The network for value function
+    estimate_q : bool
+        Whether to estimate ``q``
+    **policy_kwargs
+        The kwargs for policy network, i.e., lstm model
+
+    Returns
+    -------
+    func
+        The policy network
     """
     policy_network = lstm_model(**policy_kwargs)
 
diff --git a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
index 4b5009d45d..ef57148d5e 100644
--- a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
+++ b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
@@ -38,8 +38,10 @@ class PPOTuner
 
 logger = logging.getLogger('ppo_tuner_AutoML')
 
-def constfn(val):
-    """wrap as function"""
+def _constfn(val):
+    """
+    Wrap as function
+    """
     def f(_):
         return val
     return f
@@ -90,7 +92,7 @@ def __init__(self, obs, actions, values, neglogpacs, dones, last_value, inf_batc
 
     def get_next(self):
         """
-        get actions of the next trial
+        Get actions of the next trial
         """
         if self.iter >= self.inf_batch_size:
             return None, None
@@ -102,14 +104,14 @@ def get_next(self):
 
     def update_rewards(self, rewards, returns):
         """
-        after the trial is finished, reward and return of this trial is updated
+        After the trial is finished, reward and return of this trial is updated
         """
         self.rewards = rewards
         self.returns = returns
 
     def convert_shape(self):
         """
-        convert shape
+        Convert shape
         """
         def sf01(arr):
             """
@@ -138,9 +140,9 @@ def __init__(self, model_config, mask):
 
         set_global_seeds(None)
         assert isinstance(self.model_config.lr, float)
-        self.lr = constfn(self.model_config.lr)
+        self.lr = _constfn(self.model_config.lr)
         assert isinstance(self.model_config.cliprange, float)
-        self.cliprange = constfn(self.model_config.cliprange)
+        self.cliprange = _constfn(self.model_config.cliprange)
 
         # build lstm policy network, value share the same network
         policy = build_lstm_policy(model_config)
@@ -165,12 +167,28 @@ def __init__(self, model_config, mask):
 
     def inference(self, num):
         """
-        generate actions along with related info from policy network.
+        Generate actions along with related info from policy network.
         observation is the action of the last step.
 
-        Parameters:
+        Parameters
         ----------
-        num:             the number of trials to generate
+        num: int
+            The number of trials to generate
+
+        Returns
+        -------
+        mb_obs : list
+            Observation of the ``num`` configurations
+        mb_actions : list
+            Actions of the ``num`` configurations
+        mb_values : list
+            Values from the value function of the ``num`` configurations
+        mb_neglogpacs : list
+            ``neglogp`` of the ``num`` configurations
+        mb_dones : list
+            To show whether the play is done, always ``True``
+        last_values : tensorflow tensor
+            The last values of the ``num`` configurations, got with session run
         """
         # Here, we init the lists that will contain the mb of experiences
         mb_obs, mb_actions, mb_values, mb_dones, mb_neglogpacs = [], [], [], [], []
@@ -212,13 +230,15 @@ def inference(self, num):
 
     def compute_rewards(self, trials_info, trials_result):
         """
-        compute the rewards of the trials in trials_info based on trials_result,
+        Compute the rewards of the trials in trials_info based on trials_result,
         and update the rewards in trials_info
 
-        Parameters:
+        Parameters
         ----------
-        trials_info:             info of the generated trials
-        trials_result:           final results (e.g., acc) of the generated trials
+        trials_info : TrialsInfo
+            Info of the generated trials
+        trials_result : list
+            Final results (e.g., acc) of the generated trials
         """
         mb_rewards = np.asarray([trials_result for _ in trials_info.actions], dtype=np.float32)
         # discount/bootstrap off value fn
@@ -243,12 +263,14 @@ def compute_rewards(self, trials_info, trials_result):
 
     def train(self, trials_info, nenvs):
         """
-        train the policy/value network using trials_info
+        Train the policy/value network using trials_info
 
-        Parameters:
+        Parameters
         ----------
-        trials_info:             complete info of the generated trials from the previous inference
-        nenvs:                   the batch size of the (previous) inference
+        trials_info : TrialsInfo
+            Complete info of the generated trials from the previous inference
+        nenvs : int
+            The batch size of the (previous) inference
         """
         # keep frac decay for future optimization
         if self.cur_update <= self.nupdates:
@@ -282,27 +304,40 @@ def train(self, trials_info, nenvs):
 
 class PPOTuner(Tuner):
     """
-    PPOTuner
+    PPOTuner, the implementation inherits the main logic of the implementation
+    [ppo2 from openai](https://github.com/openai/baselines/tree/master/baselines/ppo2), and is adapted for NAS scenario.
+    It uses ``lstm`` for its policy network and value network, policy and value share the same network.
     """
 
     def __init__(self, optimize_mode, trials_per_update=20, epochs_per_update=4, minibatch_size=4,
                  ent_coef=0.0, lr=3e-4, vf_coef=0.5, max_grad_norm=0.5, gamma=0.99, lam=0.95, cliprange=0.2):
         """
-        initialization, PPO model is not initialized here as search space is not received yet.
+        Initialization, PPO model is not initialized here as search space is not received yet.
 
-        Parameters:
+        Parameters
         ----------
-        optimize_mode:         maximize or minimize
-        trials_per_update:     number of trials to have for each model update
-        epochs_per_update:     number of epochs to run for each model update
-        minibatch_size:        minibatch size (number of trials) for the update
-        ent_coef:              policy entropy coefficient in the optimization objective
-        lr:                    learning rate of the model (lstm network), constant
-        vf_coef:               value function loss coefficient in the optimization objective
-        max_grad_norm:         gradient norm clipping coefficient
-        gamma:                 discounting factor
-        lam:                   advantage estimation discounting factor (lambda in the paper)
-        cliprange:             cliprange in the PPO algorithm, constant
+        optimize_mode : str
+            maximize or minimize
+        trials_per_update : int
+            Number of trials to have for each model update
+        epochs_per_update : int
+            Number of epochs to run for each model update
+        minibatch_size : int
+            Minibatch size (number of trials) for the update
+        ent_coef : float
+            Policy entropy coefficient in the optimization objective
+        lr : float
+            Learning rate of the model (lstm network), constant
+        vf_coef : float
+            Value function loss coefficient in the optimization objective
+        max_grad_norm : float
+            Gradient norm clipping coefficient
+        gamma : float
+            Discounting factor
+        lam : float
+            Advantage estimation discounting factor (lambda in the paper)
+        cliprange : float
+            Cliprange in the PPO algorithm, constant
         """
         self.optimize_mode = OptimizeMode(optimize_mode)
         self.model_config = ModelConfig()
@@ -330,21 +365,25 @@ def __init__(self, optimize_mode, trials_per_update=20, epochs_per_update=4, min
         self.model_config.nminibatches = minibatch_size
 
         self.send_trial_callback = None
-        logger.info('=== finished PPOTuner initialization')
+        logger.info('Finished PPOTuner initialization')
 
     def _process_one_nas_space(self, block_name, block_space):
         """
-        process nas space to determine observation space and action space
+        Process nas space to determine observation space and action space
 
-        Parameters:
+        Parameters
         ----------
-        block_name:              the name of the mutable block
-        block_space:             search space of this mutable block
+        block_name : str
+            The name of the mutable block
+        block_space : dict
+            Search space of this mutable block
 
-        Returns:
-        ----------
-        actions_spaces:          list of the space of each action
-        actions_to_config:       the mapping from action to generated configuration
+        Returns
+        -------
+        actions_spaces : list
+            List of the space of each action
+        actions_to_config : list
+            The mapping from action to generated configuration
         """
         actions_spaces = []
         actions_to_config = []
@@ -385,7 +424,7 @@ def _process_one_nas_space(self, block_name, block_space):
 
     def _process_nas_space(self, search_space):
         """
-        process nas search space to get action/observation space
+        Process nas search space to get action/observation space
         """
         actions_spaces = []
         actions_to_config = []
@@ -412,7 +451,7 @@ def _process_nas_space(self, search_space):
 
     def _generate_action_mask(self):
         """
-        different step could have different action space. to deal with this case, we merge all the
+        Different step could have different action space. to deal with this case, we merge all the
         possible actions into one action space, and use mask to indicate available actions for each step
         """
         two_masks = []
@@ -439,15 +478,13 @@ def _generate_action_mask(self):
 
     def update_search_space(self, search_space):
         """
-        get search space, currently the space only includes that for NAS
+        Get search space, currently the space only includes that for NAS
 
-        Parameters:
+        Parameters
         ----------
-        search_space:                  search space for NAS
-
-        Returns:
-        -------
-        no return
+        search_space : dict
+            Search space for NAS
+            the format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
         """
         logger.info('=== update search space %s', search_space)
         assert self.search_space is None
@@ -470,7 +507,7 @@ def update_search_space(self, search_space):
 
     def _actions_to_config(self, actions):
         """
-        given actions, to generate the corresponding trial configuration
+        Given actions, to generate the corresponding trial configuration
         """
         chosen_arch = copy.deepcopy(self.chosen_arch_template)
         for cnt, act in enumerate(actions):
@@ -490,6 +527,19 @@ def _actions_to_config(self, actions):
     def generate_multiple_parameters(self, parameter_id_list, **kwargs):
         """
         Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects.
+
+        Parameters
+        ----------
+        parameter_id_list : list of int
+            Unique identifiers for each set of requested hyper-parameters.
+            These will later be used in :meth:`receive_trial_result`.
+        **kwargs
+            Not used
+
+        Returns
+        -------
+        list
+            A list of newly generated configurations
         """
         result = []
         self.send_trial_callback = kwargs['st_callback']
@@ -506,7 +556,17 @@ def generate_multiple_parameters(self, parameter_id_list, **kwargs):
 
     def generate_parameters(self, parameter_id, **kwargs):
         """
-        generate parameters, if no trial configration for now, self.credit plus 1 to send the config later
+        Generate parameters, if no trial configration for now, self.credit plus 1 to send the config later
+
+        parameter_id : int
+            Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`.
+        **kwargs
+            Not used
+
+        Returns
+        -------
+        dict
+            One newly generated configuration
         """
         if self.first_inf:
             self.trials_result = [None for _ in range(self.inf_batch_size)]
@@ -527,6 +587,7 @@ def generate_parameters(self, parameter_id, **kwargs):
 
     def _next_round_inference(self):
         """
+        Run a inference to generate next batch of configurations
         """
         self.finished_trials = 0
         self.model.compute_rewards(self.trials_info, self.trials_result)
@@ -554,8 +615,17 @@ def _next_round_inference(self):
 
     def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
         """
-        receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to
-        train the model
+        Receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to
+        train the model.
+
+        Parameters
+        ----------
+        parameter_id : int
+            Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`.
+        parameters : dict
+            Hyper-parameters generated by :meth:`generate_parameters`.
+        value : dict
+            Result from trial (the return value of :func:`nni.report_final_result`).
         """
         trial_info_idx = self.running_trials.pop(parameter_id, None)
         assert trial_info_idx is not None
@@ -572,7 +642,17 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
 
     def trial_end(self, parameter_id, success, **kwargs):
         """
-        to deal with trial failure
+        To deal with trial failure. If a trial fails, it is popped out from ``self.running_trials``,
+        and the final result of this trial is assigned with the average of the finished trials.
+
+        Parameters
+        ----------
+        parameter_id : int
+            Unique identifier for hyper-parameters used by this trial.
+        success : bool
+            True if the trial successfully completed; False if failed or terminated.
+        **kwargs
+            Not used
         """
         if not success:
             if parameter_id not in self.running_trials:
@@ -582,7 +662,7 @@ def trial_end(self, parameter_id, success, **kwargs):
             assert trial_info_idx is not None
             # use mean of finished trials as the result of this failed trial
             values = [val for val in self.trials_result if val is not None]
-            logger.warning('zql values: %s', values)
+            logger.warning('In trial_end, values: %s', values)
             self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0
             self.finished_trials += 1
             if self.finished_trials == self.inf_batch_size:
@@ -590,10 +670,11 @@ def trial_end(self, parameter_id, success, **kwargs):
 
     def import_data(self, data):
         """
-        Import additional data for tuning
+        Import additional data for tuning, not supported yet.
 
         Parameters
         ----------
-        data:               a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
+        data : list
+            A list of dictionarys, each of which has at least two keys, ``parameter`` and ``value``
         """
         logger.warning('PPOTuner cannot leverage imported data.')
diff --git a/src/sdk/pynni/nni/ppo_tuner/util.py b/src/sdk/pynni/nni/ppo_tuner/util.py
index acf704accc..5e553045d7 100644
--- a/src/sdk/pynni/nni/ppo_tuner/util.py
+++ b/src/sdk/pynni/nni/ppo_tuner/util.py
@@ -94,12 +94,14 @@ def lstm_model(nlstm=128, layer_norm=False):
 
     An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example
 
-    Parameters:
+    Parameters
     ----------
-    nlstm: int          LSTM hidden state size
-    layer_norm: bool    if True, layer-normalized version of LSTM is used
+    nlstm : int
+        LSTM hidden state size
+    layer_norm : bool
+        if True, layer-normalized version of LSTM is used
 
-    Returns:
+    Returns
     -------
     function that builds LSTM with a given input tensor / placeholder
     """
@@ -171,11 +173,15 @@ def adjust_shape(placeholder, data):
     adjust shape of the data to the shape of the placeholder if possible.
     If shape is incompatible, AssertionError is thrown
 
-    Parameters:
-    placeholder:     tensorflow input placeholder
-    data:            input data to be (potentially) reshaped to be fed into placeholder
+    Parameters
+    ----------
+    placeholder
+        tensorflow input placeholder
+    data
+        input data to be (potentially) reshaped to be fed into placeholder
 
-    Returns:
+    Returns
+    -------
     reshaped data
     """
     if not isinstance(data, np.ndarray) and not isinstance(data, list):
@@ -230,13 +236,16 @@ def observation_placeholder(ob_space, batch_size=None, name='Ob'):
     """
     Create placeholder to feed observations into of the size appropriate to the observation space
 
-    Parameters:
+    Parameters
     ----------
-    ob_space: gym.Space     observation space
-    batch_size: int         size of the batch to be fed into input. Can be left None in most cases.
-    name: str               name of the placeholder
-
-    Returns:
+    ob_space : gym.Space
+        observation space
+    batch_size : int
+        size of the batch to be fed into input. Can be left None in most cases.
+    name : str
+        name of the placeholder
+
+    Returns
     -------
     tensorflow placeholder tensor
     """
diff --git a/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py b/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py
index 527bd41d34..e61b9fe8d0 100644
--- a/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py
+++ b/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py
@@ -24,11 +24,14 @@
 
 
 def get_json_content(file_path):
-    """Load json file content
+    """
+    Load json file content
+
     Parameters
     ----------
     file_path:
         path to the file
+
     Raises
     ------
     TypeError
@@ -43,7 +46,8 @@ def get_json_content(file_path):
 
 
 def generate_pcs(nni_search_space_content):
-    """Generate the Parameter Configuration Space (PCS) which defines the
+    """
+    Generate the Parameter Configuration Space (PCS) which defines the
     legal ranges of the parameters to be optimized and their default values.
     Generally, the format is:
     # parameter_name categorical {value_1, ..., value_N} [default value]
@@ -53,14 +57,17 @@ def generate_pcs(nni_search_space_content):
     # parameter_name real [min_value, max_value] [default value]
     # parameter_name real [min_value, max_value] [default value] log
     Reference: https://automl.github.io/SMAC3/stable/options.html
+
     Parameters
     ----------
     nni_search_space_content: search_space
         The search space in this experiment in nni
+
     Returns
     -------
     Parameter Configuration Space (PCS)
         the legal ranges of the parameters to be optimized and their default values
+
     Raises
     ------
     RuntimeError
@@ -122,7 +129,8 @@ def dump_categorical(fd, key, categories):
 
 
 def generate_scenario(ss_content):
-    """Generate the scenario. The scenario-object (smac.scenario.scenario.Scenario) is used to configure SMAC and
+    """
+    Generate the scenario. The scenario-object (smac.scenario.scenario.Scenario) is used to configure SMAC and
     can be constructed either by providing an actual scenario-object, or by specifing the options in a scenario file.
     Reference: https://automl.github.io/SMAC3/stable/options.html
     The format of the scenario file is one option per line:
@@ -191,6 +199,7 @@ def generate_scenario(ss_content):
     wallclock_limit: int
         Maximum amount of wallclock-time used for optimization. Default: inf.
         Use default because this is controlled by nni
+
     Returns
     -------
     Scenario:
diff --git a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
index 4e2f876b9e..eed1b27946 100644
--- a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
+++ b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
@@ -41,13 +41,17 @@
 
 class SMACTuner(Tuner):
     """
-    Parameters
-    ----------
-    optimize_mode: str
-        optimize mode, 'maximize' or 'minimize', by default 'maximize'
+    This is a wrapper of [SMAC](https://github.com/automl/SMAC3) following NNI tuner interface.
+    It only supports ``SMAC`` mode, and does not support the multiple instances of SMAC3 (i.e.,
+    the same configuration is run multiple times).
     """
     def __init__(self, optimize_mode="maximize"):
-        """Constructor"""
+        """
+        Parameters
+        ----------
+        optimize_mode : str
+            Optimize mode, 'maximize' or 'minimize', by default 'maximize'
+        """
         self.logger = logging.getLogger(
             self.__module__ + "." + self.__class__.__name__)
         self.optimize_mode = OptimizeMode(optimize_mode)
@@ -61,11 +65,14 @@ def __init__(self, optimize_mode="maximize"):
         self.cs = None
 
     def _main_cli(self):
-        """Main function of SMAC for CLI interface
+        """
+        Main function of SMAC for CLI interface. Some initializations of the wrapped SMAC are done
+        in this function.
+
         Returns
         -------
-        instance
-            optimizer
+        obj
+            The object of the SMAC optimizer
         """
         self.logger.info("SMAC call: %s", " ".join(sys.argv))
 
@@ -126,20 +133,23 @@ def _main_cli(self):
 
     def update_search_space(self, search_space):
         """
-        NOTE: updating search space is not supported.
+        Convert search_space to the format that ``SMAC3`` could recognize, thus, not all the search space types
+        are supported. In this function, we also do the initialization of `SMAC3`, i.e., calling ``self._main_cli``.
+
+        NOTE: updating search space during experiment running is not supported.
+
         Parameters
         ----------
-        search_space: dict
-            search space
+        search_space : dict
+            The format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
         """
 
-        # TODO: this is ugly, we put all the initialization work in this method, because initialization relies
-        #         on search space, also because update_search_space is called at the beginning.
-
         if not self.update_ss_done:
             self.categorical_dict = generate_scenario(search_space)
             if self.categorical_dict is None:
                 raise RuntimeError('categorical dict is not correctly returned after parsing search space.')
+            # TODO: this is ugly, we put all the initialization work in this method, because initialization relies
+            #         on search space, also because update_search_space is called at the beginning.
             self.optimizer = self._main_cli()
             self.smbo_solver = self.optimizer.solver
             self.loguniform_key = {key for key in search_space.keys() if search_space[key]['_type'] == 'loguniform'}
@@ -148,19 +158,23 @@ def update_search_space(self, search_space):
             self.logger.warning('update search space is not supported.')
 
     def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
-        """receive_trial_result
+        """
+        Receive a trial's final performance result reported through :func:``nni.report_final_result`` by the trial.
+        GridSearchTuner does not need trial's results.
+
         Parameters
         ----------
-        parameter_id: int
-            parameter id
-        parameters:
-            parameters
-        value:
-            value
+        parameter_id : int
+            Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`.
+        parameters : dict
+            Hyper-parameters generated by :meth:`generate_parameters`.
+        value : dict
+            Result from trial (the return value of :func:`nni.report_final_result`).
+
         Raises
         ------
         RuntimeError
-            Received parameter id not in total_data
+            Received parameter id not in ``self.total_data``
         """
         reward = extract_scalar_reward(value)
         if self.optimize_mode is OptimizeMode.Maximize:
@@ -176,14 +190,16 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
 
     def param_postprocess(self, challenger_dict):
         """
-        Postprocessing for a set of parameter includes:
-        1. Convert the values of type `loguniform` back to their initial range.
-        2. Convert categorical: categorical values in search space are changed to list of numbers before,
-        those original values will be changed back in this function.
+        Postprocessing for a set of hyperparameters includes:
+            1. Convert the values of type ``loguniform`` back to their initial range.
+            2. Convert ``categorical``: categorical values in search space are changed to list of numbers before,
+               those original values will be changed back in this function.
+
         Parameters
         ----------
-        challenger_dict: dict
+        challenger_dict : dict
             challenger dict
+
         Returns
         -------
         dict
@@ -203,15 +219,21 @@ def param_postprocess(self, challenger_dict):
         return converted_dict
 
     def generate_parameters(self, parameter_id, **kwargs):
-        """generate one instance of hyperparameters
+        """
+        Generate one instance of hyperparameters (i.e., one configuration).
+        Get one from SMAC3's ``challengers``.
+
         Parameters
         ----------
-        parameter_id: int
-            parameter id
+        parameter_id : int
+            Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`.
+        **kwargs
+            Not used
+
         Returns
         -------
-        list
-            new generated parameters
+        dict
+            One newly generated configuration
         """
         if self.first_one:
             init_challenger = self.smbo_solver.nni_smac_start()
@@ -224,15 +246,23 @@ def generate_parameters(self, parameter_id, **kwargs):
                 return self.param_postprocess(challenger.get_dictionary())
 
     def generate_multiple_parameters(self, parameter_id_list, **kwargs):
-        """generate mutiple instances of hyperparameters
+        """
+        Generate mutiple instances of hyperparameters. If it is a first request,
+        retrieve the instances from initial challengers. While if it is not, request
+        new challengers and retrieve instances from the requested challengers.
+
         Parameters
         ----------
-        parameter_id_list: list
-            list of parameter id
+        parameter_id_list: list of int
+            Unique identifiers for each set of requested hyper-parameters.
+            These will later be used in :meth:`receive_trial_result`.
+        **kwargs
+            Not used
+
         Returns
         -------
         list
-            list of new generated parameters
+            a list of newly generated configurations
         """
         if self.first_one:
             params = []
@@ -254,11 +284,12 @@ def generate_multiple_parameters(self, parameter_id_list, **kwargs):
 
     def import_data(self, data):
         """
-        Import additional data for tuning
+        Import additional data for tuning.
+
         Parameters
         ----------
-        data: list of dict
-            Each of which has at least two keys, `parameter` and `value`.
+        data : list of dict
+            Each of which has at least two keys, ``parameter`` and ``value``.
         """
         _completed_num = 0
         for trial_info in data:
diff --git a/src/sdk/pynni/nni/tuner.py b/src/sdk/pynni/nni/tuner.py
index a39ed9ff11..6a280a2856 100644
--- a/src/sdk/pynni/nni/tuner.py
+++ b/src/sdk/pynni/nni/tuner.py
@@ -76,10 +76,11 @@ class Tuner(Recoverable):
     Builtin tuners:
     :class:`~nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner`
     :class:`~nni.evolution_tuner.evolution_tuner.EvolutionTuner`
-    :class:`~nni.smac_tuner.smac_tuner.SMACTuner`
-    :class:`~nni.gridsearch_tuner.gridsearch_tuner.GridSearchTuner`
+    :class:`~nni.smac_tuner.SMACTuner`
+    :class:`~nni.gridsearch_tuner.GridSearchTuner`
     :class:`~nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner`
-    :class:`~nni.metis_tuner.metis_tuner.MetisTuner`
+    :class:`~nni.metis_tuner.mets_tuner.MetisTuner`
+    :class:`~nni.ppo_tuner.PPOTuner`
     :class:`~nni.gp_tuner.gp_tuner.GPTuner`
     """
 

From 187494aafdda77bc22a5309c068528a132db26fc Mon Sep 17 00:00:00 2001
From: Yan Ni <yann@microsoft.com>
Date: Wed, 13 Nov 2019 08:15:03 +0800
Subject: [PATCH 3/3] update docstring doc (#1645)

---
 docs/en_US/Tutorial/Contributing.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/en_US/Tutorial/Contributing.md b/docs/en_US/Tutorial/Contributing.md
index acfd0a5068..2dea24bc6c 100644
--- a/docs/en_US/Tutorial/Contributing.md
+++ b/docs/en_US/Tutorial/Contributing.md
@@ -40,6 +40,9 @@ A person looking to contribute can take up an issue by claiming it as a comment/
 ## Code Styles & Naming Conventions
 * We follow [PEP8](https://www.python.org/dev/peps/pep-0008/) for Python code and naming conventions, do try to adhere to the same when making a pull request or making a change. One can also take the help of linters such as `flake8` or `pylint`
 * We also follow [NumPy Docstring Style](https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html#example-numpy) for Python Docstring Conventions. During the [documentation building](Contributing.md#documentation), we use [sphinx.ext.napoleon](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) to generate Python API documentation from Docstring.
+* For docstrings, please refer to [numpydoc docstring guide](https://numpydoc.readthedocs.io/en/latest/format.html) and [pandas docstring guide](https://python-sprints.github.io/pandas/guide/pandas_docstring.html)
+    * For function docstring, **description**, **Parameters**, and **Returns**/**Yields** are mandatory.
+    * For class docstring, **description**, **Attributes** are mandatory.
 
 ## Documentation
 Our documentation is built with [sphinx](http://sphinx-doc.org/), supporting [Markdown](https://guides.github.com/features/mastering-markdown/) and [reStructuredText](http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html) format. All our documentations are placed under [docs/en_US](https://github.com/Microsoft/nni/tree/master/docs).
@@ -48,4 +51,4 @@ Our documentation is built with [sphinx](http://sphinx-doc.org/), supporting [Ma
 
 * For links, please consider using __relative paths__ first. However, if the documentation is written in Markdown format, and:
     * It's an image link which needs to be formatted with embedded html grammar, please use global URL like `https://user-images.githubusercontent.com/44491713/51381727-e3d0f780-1b4f-11e9-96ab-d26b9198ba65.png`, which can be automatically generated by dragging picture onto [Github Issue](https://github.com/Microsoft/nni/issues/new) Box.
-    * It cannot be re-formatted by sphinx, such as source code, please use its global URL. For source code that links to our github repo, please use URLs rooted at `https://github.com/Microsoft/nni/tree/master/` ([mnist.py](https://github.com/Microsoft/nni/blob/master/examples/trials/mnist/mnist.py) for example).
\ No newline at end of file
+    * It cannot be re-formatted by sphinx, such as source code, please use its global URL. For source code that links to our github repo, please use URLs rooted at `https://github.com/Microsoft/nni/tree/master/` ([mnist.py](https://github.com/Microsoft/nni/blob/master/examples/trials/mnist/mnist.py) for example).