Migrate lint to Python 3

dmlc · Apr 21, 2019 · 6ffa667 · 6ffa667
1 parent 711397d
commit 6ffa667
Show file tree

Hide file tree

Showing 11 changed files with 99 additions and 111 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -56,6 +56,8 @@ addons:
     packages:
       - clang
       - clang-tidy-5.0
+      - python3
+      - python3-pip
       - cmake-data
       - doxygen
       - wget

diff --git a/Makefile b/Makefile
@@ -173,10 +173,10 @@ xgboost: $(CLI_OBJ) $(ALL_DEP)
 	$(CXX) $(CFLAGS) -o $@  $(filter %.o %.a, $^)  $(LDFLAGS)
 
 rcpplint:
-	python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
+	python3 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
 
 lint: rcpplint
-	python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} include src plugin python-package
+	python3 dmlc-core/scripts/lint.py --pylint-rc ${PWD}/python-package/.pylintrc xgboost ${LINT_LANG} include src plugin python-package
 
 pylint:
 	flake8 --ignore E501 python-package

diff --git a/python-package/.pylintrc b/python-package/.pylintrc
@@ -4,7 +4,7 @@ ignore=tests
 
 extension-pkg-whitelist=numpy
 
-disiable=unexpected-special-method-signature,too-many-nested-blocks
+disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance
 
 dummy-variables-rgx=(unused|)_.*
 

diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py
@@ -1,5 +1,5 @@
 # coding: utf-8
-# pylint: disable= invalid-name
+# pylint: disable=invalid-name, too-many-statements
 """Training Library containing training routines."""
 from __future__ import absolute_import
 
@@ -20,13 +20,11 @@ def _fmt_metric(value, show_stdv=True):
     """format metric string"""
     if len(value) == 2:
         return '%s:%g' % (value[0], value[1])
-    elif len(value) == 3:
+    if len(value) == 3:
         if show_stdv:
             return '%s:%g+%g' % (value[0], value[1], value[2])
-        else:
-            return '%s:%g' % (value[0], value[1])
-    else:
-        raise ValueError("wrong metric value")
+        return '%s:%g' % (value[0], value[1])
+    raise ValueError("wrong metric value")
 
 
 def print_evaluation(period=1, show_stdv=True):
@@ -50,7 +48,7 @@ def print_evaluation(period=1, show_stdv=True):
     """
     def callback(env):
         """internal function"""
-        if env.rank != 0 or len(env.evaluation_result_list) == 0 or period is False or period == 0:
+        if env.rank != 0 or (not env.evaluation_result_list) or period is False or period == 0:
             return
         i = env.iteration
         if (i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration):
@@ -89,7 +87,7 @@ def init(env):
 
     def callback(env):
         """internal function"""
-        if len(eval_result) == 0:
+        if not eval_result:
             init(env)
         for k, v in env.evaluation_result_list:
             pos = k.index('-')
@@ -182,14 +180,14 @@ def init(env):
         """internal function"""
         bst = env.model
 
-        if len(env.evaluation_result_list) == 0:
+        if not env.evaluation_result_list:
             raise ValueError('For early stopping you need at least one set in evals.')
         if len(env.evaluation_result_list) > 1 and verbose:
             msg = ("Multiple eval metrics have been passed: "
                    "'{0}' will be used for early stopping.\n\n")
             rabit.tracker_print(msg.format(env.evaluation_result_list[-1][0]))
         maximize_metrics = ('auc', 'aucpr', 'map', 'ndcg')
-        maximize_at_n_metrics = ('auc@', 'aucpr@' 'map@', 'ndcg@')
+        maximize_at_n_metrics = ('auc@', 'aucpr@', 'map@', 'ndcg@')
         maximize_score = maximize
         metric_label = env.evaluation_result_list[-1][0]
         metric = metric_label.split('-', 1)[-1]
@@ -225,7 +223,7 @@ def init(env):
     def callback(env):
         """internal function"""
         score = env.evaluation_result_list[-1][1]
-        if len(state) == 0:
+        if not state:
             init(env)
         best_score = state['best_score']
         best_iteration = state['best_iteration']

diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py
@@ -11,14 +11,13 @@
 
 if PY3:
     # pylint: disable=invalid-name, redefined-builtin
-    STRING_TYPES = str,
+    STRING_TYPES = (str,)
 
     def py_str(x):
         """convert c string back to python string"""
         return x.decode('utf-8')
 else:
-    # pylint: disable=invalid-name
-    STRING_TYPES = basestring,
+    STRING_TYPES = (basestring,)  # pylint: disable=undefined-variable
 
     def py_str(x):
         """convert c string back to python string"""
@@ -37,13 +36,13 @@ def py_str(x):
     PANDAS_INSTALLED = True
 except ImportError:
 
+    # pylint: disable=too-few-public-methods
     class MultiIndex(object):
         """ dummy for pandas.MultiIndex """
-        pass
 
+    # pylint: disable=too-few-public-methods
     class DataFrame(object):
         """ dummy for pandas.DataFrame """
-        pass
 
     PANDAS_INSTALLED = False
 
@@ -57,9 +56,9 @@ class DataFrame(object):
     DT_INSTALLED = True
 except ImportError:
 
+    # pylint: disable=too-few-public-methods
     class DataTable(object):
         """ dummy for datatable.DataTable """
-        pass
 
     DT_INSTALLED = False
 

diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 # pylint: disable=too-many-arguments, too-many-branches, invalid-name
-# pylint: disable=too-many-branches, too-many-lines, W0141
+# pylint: disable=too-many-branches, too-many-lines, too-many-locals
 """Core XGBoost Library."""
 from __future__ import absolute_import
 import collections
@@ -30,7 +30,6 @@
 
 class XGBoostError(Exception):
     """Error thrown by xgboost trainer."""
-    pass
 
 
 class EarlyStopException(Exception):
@@ -67,18 +66,16 @@ def from_pystr_to_cstr(data):
         list of str
     """
 
-    if isinstance(data, list):
-        pointers = (ctypes.c_char_p * len(data))()
-        if PY3:
-            data = [bytes(d, 'utf-8') for d in data]
-        else:
-            data = [d.encode('utf-8') if isinstance(d, unicode) else d
-                    for d in data]
-        pointers[:] = data
-        return pointers
-    else:
-        # copy from above when we actually use it
+    if not isinstance(data, list):
         raise NotImplementedError
+    pointers = (ctypes.c_char_p * len(data))()
+    if PY3:
+        data = [bytes(d, 'utf-8') for d in data]
+    else:
+        data = [d.encode('utf-8') if isinstance(d, unicode) else d  # pylint: disable=undefined-variable
+                for d in data]
+    pointers[:] = data
+    return pointers
 
 
 def from_cstr_to_pystr(data, length):
@@ -104,6 +101,7 @@ def from_cstr_to_pystr(data, length):
             try:
                 res.append(str(data[i].decode('ascii')))
             except UnicodeDecodeError:
+                # pylint: disable=undefined-variable
                 res.append(unicode(data[i].decode('utf-8')))
     return res
 
@@ -123,7 +121,7 @@ def _get_log_callback_func():
 def _load_lib():
     """Load xgboost Library."""
     lib_paths = find_lib_path()
-    if len(lib_paths) == 0:
+    if not lib_paths:
         return None
     try:
         pathBackup = os.environ['PATH'].split(os.pathsep)
@@ -267,8 +265,7 @@ def _maybe_pandas_label(label):
         label_dtypes = label.dtypes
         if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in label_dtypes):
             raise ValueError('DataFrame.dtypes for label must be int, float or bool')
-        else:
-            label = label.values.astype('float')
+        label = label.values.astype('float')
     # pd.Series can be passed to xgb as it is
 
     return label
@@ -301,8 +298,7 @@ def _maybe_dt_data(data, feature_names, feature_types):
         # always return stypes for dt ingestion
         if feature_types is not None:
             raise ValueError('DataTable has own feature types, cannot pass them in')
-        else:
-            feature_types = np.vectorize(DT_TYPE_MAPPER2.get)(data_types_names)
+        feature_types = np.vectorize(DT_TYPE_MAPPER2.get)(data_types_names)
 
     return data, feature_names, feature_types
 
@@ -512,7 +508,7 @@ def _init_from_dt(self, data, nthread):
                 ptrs[icol] = ctypes.c_void_p(ptr)
         else:
             # datatable<=0.8.0
-            from datatable.internal import frame_column_data_r
+            from datatable.internal import frame_column_data_r  # pylint: disable=no-name-in-module,import-error
             for icol in range(data.ncols):
                 ptrs[icol] = frame_column_data_r(data, icol)
 
@@ -1039,8 +1035,7 @@ def attr(self, key):
             self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success)))
         if success.value != 0:
             return py_str(ret.value)
-        else:
-            return None
+        return None
 
     def attributes(self):
         """Get attributes stored in the Booster as a dictionary.
@@ -1056,8 +1051,7 @@ def attributes(self):
                                                ctypes.byref(length),
                                                ctypes.byref(sarr)))
         attr_names = from_cstr_to_pystr(sarr, length)
-        res = dict([(n, self.attr(n)) for n in attr_names])
-        return res
+        return {n: self.attr(n) for n in attr_names}
 
     def set_attr(self, **kwargs):
         """Set the attribute of the Booster.
@@ -1399,13 +1393,13 @@ def dump_model(self, fout, fmap='', with_stats=False, dump_format="text"):
         ret = self.get_dump(fmap, with_stats, dump_format)
         if dump_format == 'json':
             fout.write('[\n')
-            for i in range(len(ret)):
+            for i, _ in enumerate(ret):
                 fout.write(ret[i])
                 if i < len(ret) - 1:
                     fout.write(",\n")
             fout.write('\n]')
         else:
-            for i in range(len(ret)):
+            for i, _ in enumerate(ret):
                 fout.write('booster[{}]:\n'.format(i))
                 fout.write(ret[i])
         if need_close:
@@ -1538,51 +1532,50 @@ def get_score(self, fmap='', importance_type='weight'):
 
             return fmap
 
-        else:
-            average_over_splits = True
-            if importance_type == 'total_gain':
-                importance_type = 'gain'
-                average_over_splits = False
-            elif importance_type == 'total_cover':
-                importance_type = 'cover'
-                average_over_splits = False
+        average_over_splits = True
+        if importance_type == 'total_gain':
+            importance_type = 'gain'
+            average_over_splits = False
+        elif importance_type == 'total_cover':
+            importance_type = 'cover'
+            average_over_splits = False
 
-            trees = self.get_dump(fmap, with_stats=True)
+        trees = self.get_dump(fmap, with_stats=True)
 
-            importance_type += '='
-            fmap = {}
-            gmap = {}
-            for tree in trees:
-                for line in tree.split('\n'):
-                    # look for the opening square bracket
-                    arr = line.split('[')
-                    # if no opening bracket (leaf node), ignore this line
-                    if len(arr) == 1:
-                        continue
+        importance_type += '='
+        fmap = {}
+        gmap = {}
+        for tree in trees:
+            for line in tree.split('\n'):
+                # look for the opening square bracket
+                arr = line.split('[')
+                # if no opening bracket (leaf node), ignore this line
+                if len(arr) == 1:
+                    continue
 
-                    # look for the closing bracket, extract only info within that bracket
-                    fid = arr[1].split(']')
+                # look for the closing bracket, extract only info within that bracket
+                fid = arr[1].split(']')
 
-                    # extract gain or cover from string after closing bracket
-                    g = float(fid[1].split(importance_type)[1].split(',')[0])
+                # extract gain or cover from string after closing bracket
+                g = float(fid[1].split(importance_type)[1].split(',')[0])
 
-                    # extract feature name from string before closing bracket
-                    fid = fid[0].split('<')[0]
+                # extract feature name from string before closing bracket
+                fid = fid[0].split('<')[0]
 
-                    if fid not in fmap:
-                        # if the feature hasn't been seen yet
-                        fmap[fid] = 1
-                        gmap[fid] = g
-                    else:
-                        fmap[fid] += 1
-                        gmap[fid] += g
+                if fid not in fmap:
+                    # if the feature hasn't been seen yet
+                    fmap[fid] = 1
+                    gmap[fid] = g
+                else:
+                    fmap[fid] += 1
+                    gmap[fid] += g
 
-            # calculate average value (gain/cover) for each feature
-            if average_over_splits:
-                for fid in gmap:
-                    gmap[fid] = gmap[fid] / fmap[fid]
+        # calculate average value (gain/cover) for each feature
+        if average_over_splits:
+            for fid in gmap:
+                gmap[fid] = gmap[fid] / fmap[fid]
 
-            return gmap
+        return gmap
 
     def trees_to_dataframe(self, fmap=''):
         """Parse a boosted tree model text dump into a pandas DataFrame structure.
@@ -1721,7 +1714,7 @@ def get_split_value_histogram(self, feature, fmap='', bins=None, as_pandas=True)
         xgdump = self.get_dump(fmap=fmap)
         values = []
         regexp = re.compile(r"\[{0}<([\d.Ee+-]+)\]".format(feature))
-        for i in range(len(xgdump)):
+        for i, _ in enumerate(xgdump):
             m = re.findall(regexp, xgdump[i])
             values.extend(map(float, m))
 
@@ -1734,9 +1727,7 @@ def get_split_value_histogram(self, feature, fmap='', bins=None, as_pandas=True)
 
         if as_pandas and PANDAS_INSTALLED:
             return DataFrame(nph, columns=['SplitValue', 'Count'])
-        elif as_pandas and not PANDAS_INSTALLED:
+        if as_pandas and not PANDAS_INSTALLED:
             sys.stderr.write(
                 "Returning histogram as ndarray (as_pandas == True, but pandas is not installed).")
-            return nph
-        else:
-            return nph
+        return nph
diff --git a/python-package/xgboost/libpath.py b/python-package/xgboost/libpath.py
@@ -8,7 +8,6 @@
 
 class XGBoostLibraryNotFound(Exception):
     """Error thrown by when xgboost is not found"""
-    pass
 
 
 def find_lib_path():
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,7 +8,6 @@ @@
     class XGBoostLibraryNotFound(Exception):
         """Error thrown by when xgboost is not found"""
-        pass
     def find_lib_path():
@@ Expand Down @@