Skip to content

Commit

Permalink
Migrate lint to Python 3
Browse files Browse the repository at this point in the history
  • Loading branch information
hcho3 committed Apr 21, 2019
1 parent 711397d commit 6ffa667
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 111 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ addons:
packages:
- clang
- clang-tidy-5.0
- python3
- python3-pip
- cmake-data
- doxygen
- wget
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,10 @@ xgboost: $(CLI_OBJ) $(ALL_DEP)
$(CXX) $(CFLAGS) -o $@ $(filter %.o %.a, $^) $(LDFLAGS)

rcpplint:
python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
python3 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src

lint: rcpplint
python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} include src plugin python-package
python3 dmlc-core/scripts/lint.py --pylint-rc ${PWD}/python-package/.pylintrc xgboost ${LINT_LANG} include src plugin python-package

pylint:
flake8 --ignore E501 python-package
Expand Down
2 changes: 1 addition & 1 deletion python-package/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ignore=tests

extension-pkg-whitelist=numpy

disiable=unexpected-special-method-signature,too-many-nested-blocks
disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance

dummy-variables-rgx=(unused|)_.*

Expand Down
20 changes: 9 additions & 11 deletions python-package/xgboost/callback.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# coding: utf-8
# pylint: disable= invalid-name
# pylint: disable=invalid-name, too-many-statements
"""Training Library containing training routines."""
from __future__ import absolute_import

Expand All @@ -20,13 +20,11 @@ def _fmt_metric(value, show_stdv=True):
"""format metric string"""
if len(value) == 2:
return '%s:%g' % (value[0], value[1])
elif len(value) == 3:
if len(value) == 3:
if show_stdv:
return '%s:%g+%g' % (value[0], value[1], value[2])
else:
return '%s:%g' % (value[0], value[1])
else:
raise ValueError("wrong metric value")
return '%s:%g' % (value[0], value[1])
raise ValueError("wrong metric value")


def print_evaluation(period=1, show_stdv=True):
Expand All @@ -50,7 +48,7 @@ def print_evaluation(period=1, show_stdv=True):
"""
def callback(env):
"""internal function"""
if env.rank != 0 or len(env.evaluation_result_list) == 0 or period is False or period == 0:
if env.rank != 0 or (not env.evaluation_result_list) or period is False or period == 0:
return
i = env.iteration
if (i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration):
Expand Down Expand Up @@ -89,7 +87,7 @@ def init(env):

def callback(env):
"""internal function"""
if len(eval_result) == 0:
if not eval_result:
init(env)
for k, v in env.evaluation_result_list:
pos = k.index('-')
Expand Down Expand Up @@ -182,14 +180,14 @@ def init(env):
"""internal function"""
bst = env.model

if len(env.evaluation_result_list) == 0:
if not env.evaluation_result_list:
raise ValueError('For early stopping you need at least one set in evals.')
if len(env.evaluation_result_list) > 1 and verbose:
msg = ("Multiple eval metrics have been passed: "
"'{0}' will be used for early stopping.\n\n")
rabit.tracker_print(msg.format(env.evaluation_result_list[-1][0]))
maximize_metrics = ('auc', 'aucpr', 'map', 'ndcg')
maximize_at_n_metrics = ('auc@', 'aucpr@' 'map@', 'ndcg@')
maximize_at_n_metrics = ('auc@', 'aucpr@', 'map@', 'ndcg@')
maximize_score = maximize
metric_label = env.evaluation_result_list[-1][0]
metric = metric_label.split('-', 1)[-1]
Expand Down Expand Up @@ -225,7 +223,7 @@ def init(env):
def callback(env):
"""internal function"""
score = env.evaluation_result_list[-1][1]
if len(state) == 0:
if not state:
init(env)
best_score = state['best_score']
best_iteration = state['best_iteration']
Expand Down
11 changes: 5 additions & 6 deletions python-package/xgboost/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@

if PY3:
# pylint: disable=invalid-name, redefined-builtin
STRING_TYPES = str,
STRING_TYPES = (str,)

def py_str(x):
"""convert c string back to python string"""
return x.decode('utf-8')
else:
# pylint: disable=invalid-name
STRING_TYPES = basestring,
STRING_TYPES = (basestring,) # pylint: disable=undefined-variable

def py_str(x):
"""convert c string back to python string"""
Expand All @@ -37,13 +36,13 @@ def py_str(x):
PANDAS_INSTALLED = True
except ImportError:

# pylint: disable=too-few-public-methods
class MultiIndex(object):
""" dummy for pandas.MultiIndex """
pass

# pylint: disable=too-few-public-methods
class DataFrame(object):
""" dummy for pandas.DataFrame """
pass

PANDAS_INSTALLED = False

Expand All @@ -57,9 +56,9 @@ class DataFrame(object):
DT_INSTALLED = True
except ImportError:

# pylint: disable=too-few-public-methods
class DataTable(object):
""" dummy for datatable.DataTable """
pass

DT_INSTALLED = False

Expand Down
125 changes: 58 additions & 67 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8
# pylint: disable=too-many-arguments, too-many-branches, invalid-name
# pylint: disable=too-many-branches, too-many-lines, W0141
# pylint: disable=too-many-branches, too-many-lines, too-many-locals
"""Core XGBoost Library."""
from __future__ import absolute_import
import collections
Expand Down Expand Up @@ -30,7 +30,6 @@

class XGBoostError(Exception):
"""Error thrown by xgboost trainer."""
pass


class EarlyStopException(Exception):
Expand Down Expand Up @@ -67,18 +66,16 @@ def from_pystr_to_cstr(data):
list of str
"""

if isinstance(data, list):
pointers = (ctypes.c_char_p * len(data))()
if PY3:
data = [bytes(d, 'utf-8') for d in data]
else:
data = [d.encode('utf-8') if isinstance(d, unicode) else d
for d in data]
pointers[:] = data
return pointers
else:
# copy from above when we actually use it
if not isinstance(data, list):
raise NotImplementedError
pointers = (ctypes.c_char_p * len(data))()
if PY3:
data = [bytes(d, 'utf-8') for d in data]
else:
data = [d.encode('utf-8') if isinstance(d, unicode) else d # pylint: disable=undefined-variable
for d in data]
pointers[:] = data
return pointers


def from_cstr_to_pystr(data, length):
Expand All @@ -104,6 +101,7 @@ def from_cstr_to_pystr(data, length):
try:
res.append(str(data[i].decode('ascii')))
except UnicodeDecodeError:
# pylint: disable=undefined-variable
res.append(unicode(data[i].decode('utf-8')))
return res

Expand All @@ -123,7 +121,7 @@ def _get_log_callback_func():
def _load_lib():
"""Load xgboost Library."""
lib_paths = find_lib_path()
if len(lib_paths) == 0:
if not lib_paths:
return None
try:
pathBackup = os.environ['PATH'].split(os.pathsep)
Expand Down Expand Up @@ -267,8 +265,7 @@ def _maybe_pandas_label(label):
label_dtypes = label.dtypes
if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in label_dtypes):
raise ValueError('DataFrame.dtypes for label must be int, float or bool')
else:
label = label.values.astype('float')
label = label.values.astype('float')
# pd.Series can be passed to xgb as it is

return label
Expand Down Expand Up @@ -301,8 +298,7 @@ def _maybe_dt_data(data, feature_names, feature_types):
# always return stypes for dt ingestion
if feature_types is not None:
raise ValueError('DataTable has own feature types, cannot pass them in')
else:
feature_types = np.vectorize(DT_TYPE_MAPPER2.get)(data_types_names)
feature_types = np.vectorize(DT_TYPE_MAPPER2.get)(data_types_names)

return data, feature_names, feature_types

Expand Down Expand Up @@ -512,7 +508,7 @@ def _init_from_dt(self, data, nthread):
ptrs[icol] = ctypes.c_void_p(ptr)
else:
# datatable<=0.8.0
from datatable.internal import frame_column_data_r
from datatable.internal import frame_column_data_r # pylint: disable=no-name-in-module,import-error
for icol in range(data.ncols):
ptrs[icol] = frame_column_data_r(data, icol)

Expand Down Expand Up @@ -1039,8 +1035,7 @@ def attr(self, key):
self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success)))
if success.value != 0:
return py_str(ret.value)
else:
return None
return None

def attributes(self):
"""Get attributes stored in the Booster as a dictionary.
Expand All @@ -1056,8 +1051,7 @@ def attributes(self):
ctypes.byref(length),
ctypes.byref(sarr)))
attr_names = from_cstr_to_pystr(sarr, length)
res = dict([(n, self.attr(n)) for n in attr_names])
return res
return {n: self.attr(n) for n in attr_names}

def set_attr(self, **kwargs):
"""Set the attribute of the Booster.
Expand Down Expand Up @@ -1399,13 +1393,13 @@ def dump_model(self, fout, fmap='', with_stats=False, dump_format="text"):
ret = self.get_dump(fmap, with_stats, dump_format)
if dump_format == 'json':
fout.write('[\n')
for i in range(len(ret)):
for i, _ in enumerate(ret):
fout.write(ret[i])
if i < len(ret) - 1:
fout.write(",\n")
fout.write('\n]')
else:
for i in range(len(ret)):
for i, _ in enumerate(ret):
fout.write('booster[{}]:\n'.format(i))
fout.write(ret[i])
if need_close:
Expand Down Expand Up @@ -1538,51 +1532,50 @@ def get_score(self, fmap='', importance_type='weight'):

return fmap

else:
average_over_splits = True
if importance_type == 'total_gain':
importance_type = 'gain'
average_over_splits = False
elif importance_type == 'total_cover':
importance_type = 'cover'
average_over_splits = False
average_over_splits = True
if importance_type == 'total_gain':
importance_type = 'gain'
average_over_splits = False
elif importance_type == 'total_cover':
importance_type = 'cover'
average_over_splits = False

trees = self.get_dump(fmap, with_stats=True)
trees = self.get_dump(fmap, with_stats=True)

importance_type += '='
fmap = {}
gmap = {}
for tree in trees:
for line in tree.split('\n'):
# look for the opening square bracket
arr = line.split('[')
# if no opening bracket (leaf node), ignore this line
if len(arr) == 1:
continue
importance_type += '='
fmap = {}
gmap = {}
for tree in trees:
for line in tree.split('\n'):
# look for the opening square bracket
arr = line.split('[')
# if no opening bracket (leaf node), ignore this line
if len(arr) == 1:
continue

# look for the closing bracket, extract only info within that bracket
fid = arr[1].split(']')
# look for the closing bracket, extract only info within that bracket
fid = arr[1].split(']')

# extract gain or cover from string after closing bracket
g = float(fid[1].split(importance_type)[1].split(',')[0])
# extract gain or cover from string after closing bracket
g = float(fid[1].split(importance_type)[1].split(',')[0])

# extract feature name from string before closing bracket
fid = fid[0].split('<')[0]
# extract feature name from string before closing bracket
fid = fid[0].split('<')[0]

if fid not in fmap:
# if the feature hasn't been seen yet
fmap[fid] = 1
gmap[fid] = g
else:
fmap[fid] += 1
gmap[fid] += g
if fid not in fmap:
# if the feature hasn't been seen yet
fmap[fid] = 1
gmap[fid] = g
else:
fmap[fid] += 1
gmap[fid] += g

# calculate average value (gain/cover) for each feature
if average_over_splits:
for fid in gmap:
gmap[fid] = gmap[fid] / fmap[fid]
# calculate average value (gain/cover) for each feature
if average_over_splits:
for fid in gmap:
gmap[fid] = gmap[fid] / fmap[fid]

return gmap
return gmap

def trees_to_dataframe(self, fmap=''):
"""Parse a boosted tree model text dump into a pandas DataFrame structure.
Expand Down Expand Up @@ -1721,7 +1714,7 @@ def get_split_value_histogram(self, feature, fmap='', bins=None, as_pandas=True)
xgdump = self.get_dump(fmap=fmap)
values = []
regexp = re.compile(r"\[{0}<([\d.Ee+-]+)\]".format(feature))
for i in range(len(xgdump)):
for i, _ in enumerate(xgdump):
m = re.findall(regexp, xgdump[i])
values.extend(map(float, m))

Expand All @@ -1734,9 +1727,7 @@ def get_split_value_histogram(self, feature, fmap='', bins=None, as_pandas=True)

if as_pandas and PANDAS_INSTALLED:
return DataFrame(nph, columns=['SplitValue', 'Count'])
elif as_pandas and not PANDAS_INSTALLED:
if as_pandas and not PANDAS_INSTALLED:
sys.stderr.write(
"Returning histogram as ndarray (as_pandas == True, but pandas is not installed).")
return nph
else:
return nph
return nph
1 change: 0 additions & 1 deletion python-package/xgboost/libpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

class XGBoostLibraryNotFound(Exception):
"""Error thrown by when xgboost is not found"""
pass


def find_lib_path():
Expand Down
Loading

0 comments on commit 6ffa667

Please sign in to comment.