microsoft · chicm-ms · Oct 31, 2019 · Oct 28, 2019 · Oct 31, 2019
diff --git a/src/sdk/pynni/nni/ppo_tuner/distri.py b/src/sdk/pynni/nni/ppo_tuner/distri.py
@@ -143,14 +143,14 @@ def sample(self):
             re_masked_res = tf.reshape(masked_res, [-1, self.size])
 
             u = tf.random_uniform(tf.shape(re_masked_res), dtype=self.logits.dtype)
-            return tf.argmax(re_masked_res - tf.log(-tf.log(u)), axis=-1)
+            return tf.argmax(re_masked_res - tf.log(-1*tf.log(u)), axis=-1)
         else:
             u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype)
-            return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1)
+            return tf.argmax(self.logits - tf.log(-1*tf.log(u)), axis=-1)
 
     @classmethod
     def fromflat(cls, flat):
-        return cls(flat)
+        return cls(flat) # pylint: disable=no-value-for-parameter
 
 class CategoricalPdType(PdType):
     """

diff --git a/src/sdk/pynni/nni/ppo_tuner/policy.py b/src/sdk/pynni/nni/ppo_tuner/policy.py
@@ -107,7 +107,7 @@ def _build_model_for_step(self):
         def sample(logits, mask_npinf):
             new_logits = tf.math.add(logits, mask_npinf)
             u = tf.random_uniform(tf.shape(new_logits), dtype=logits.dtype)
-            return tf.argmax(new_logits - tf.log(-tf.log(u)), axis=-1)
+            return tf.argmax(new_logits - tf.log(-1*tf.log(u)), axis=-1)
 
         def neglogp(logits, x):
             # return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)

diff --git a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
@@ -22,11 +22,9 @@
     class PPOTuner
 """
 
-import os
 import copy
 import logging
 import numpy as np
-import json_tricks
 from gym import spaces
 
 import nni
@@ -236,7 +234,8 @@ def compute_rewards(self, trials_info, trials_result):
                 nextnonterminal = 1.0 - trials_info.dones[t+1]
                 nextvalues = trials_info.values[t+1]
             delta = mb_rewards[t] + self.model_config.gamma * nextvalues * nextnonterminal - trials_info.values[t]
-            mb_advs[t] = lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam
+            lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam
+            mb_advs[t] = lastgaelam # pylint: disable=unsupported-assignment-operation
         mb_returns = mb_advs + trials_info.values
 
         trials_info.update_rewards(mb_rewards, mb_returns)
@@ -536,8 +535,10 @@ def _next_round_inference(self):
         # generate new trials
         self.trials_result = [None for _ in range(self.inf_batch_size)]
         mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size)
-        self.trials_info = TrialsInfo(mb_obs, mb_actions, mb_values, mb_neglogpacs,
-                                        mb_dones, last_values, self.inf_batch_size)
+        self.trials_info = TrialsInfo(mb_obs, mb_actions,
+                                      mb_values, mb_neglogpacs,
+                                      mb_dones, last_values,
+                                      self.inf_batch_size)
         # check credit and submit new trials
         for _ in range(self.credit):
             trial_info_idx, actions = self.trials_info.get_next()
@@ -581,8 +582,8 @@ def trial_end(self, parameter_id, success, **kwargs):
             assert trial_info_idx is not None
             # use mean of finished trials as the result of this failed trial
             values = [val for val in self.trials_result if val is not None]
-            logger.warning('zql values: {0}'.format(values))
-            self.trials_result[trial_info_idx] = (sum(values) / len(values)) if len(values) > 0 else 0
+            logger.warning('zql values: %s', values)
+            self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0
             self.finished_trials += 1
             if self.finished_trials == self.inf_batch_size:
                 self._next_round_inference()

diff --git a/src/sdk/pynni/nni/ppo_tuner/util.py b/src/sdk/pynni/nni/ppo_tuner/util.py
@@ -56,7 +56,7 @@ def seq_to_batch(h, flat=False):
 
 def lstm(xs, ms, s, scope, nh, init_scale=1.0):
     """lstm cell"""
-    nbatch, nin = [v.value for v in xs[0].get_shape()]
+    _, nin = [v.value for v in xs[0].get_shape()] # the first is nbatch
     with tf.variable_scope(scope):
         wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
         wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))

diff --git a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
@@ -39,7 +39,6 @@
 
 from .convert_ss_to_scenario import generate_scenario
 
-
 class SMACTuner(Tuner):
     """
     Parameters
-Original file line number
+Diff line change
@@ Expand Up / @@ -39,7 +39,6 @@ @@
     from .convert_ss_to_scenario import generate_scenario
     class SMACTuner(Tuner):
         """
         Parameters
@@ Expand Down @@