Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] use better names for imported classes from extra libraries #3862

Merged
merged 1 commit into from
Jan 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy as np
import scipy.sparse

from .compat import PANDAS_INSTALLED, DataFrame, Series, concat, is_dtype_sparse, DataTable
from .compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat, is_dtype_sparse, dt_DataTable
from .libpath import find_lib_path


Expand Down Expand Up @@ -140,7 +140,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'):
return data.astype(dtype=dtype, copy=False)
elif is_1d_list(data):
return np.array(data, dtype=dtype, copy=False)
elif isinstance(data, Series):
elif isinstance(data, pd_Series):
if _get_bad_pandas_dtypes([data.dtypes]):
raise ValueError('Series.dtypes must be int, float or bool')
return np.array(data, dtype=dtype, copy=False) # SparseArray should be supported as well
Expand Down Expand Up @@ -493,7 +493,7 @@ def _get_bad_pandas_dtypes(dtypes):


def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorical):
if isinstance(data, DataFrame):
if isinstance(data, pd_DataFrame):
if len(data.shape) != 2 or data.shape[0] < 1:
raise ValueError('Input data must be 2 dimensional and non empty.')
if feature_name == 'auto' or feature_name is None:
Expand Down Expand Up @@ -537,7 +537,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica


def _label_from_pandas(label):
if isinstance(label, DataFrame):
if isinstance(label, pd_DataFrame):
if len(label.columns) > 1:
raise ValueError('DataFrame for label cannot have multiple columns')
if _get_bad_pandas_dtypes(label.dtypes):
Expand Down Expand Up @@ -720,7 +720,7 @@ def predict(self, data, start_iteration=0, num_iteration=-1,
except BaseException:
raise ValueError('Cannot convert data list to numpy array.')
preds, nrow = self.__pred_for_np2d(data, start_iteration, num_iteration, predict_type)
elif isinstance(data, DataTable):
elif isinstance(data, dt_DataTable):
preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type)
else:
try:
Expand Down Expand Up @@ -1258,7 +1258,7 @@ def _lazy_init(self, data, label=None, reference=None,
self.__init_from_np2d(data, params_str, ref_dataset)
elif isinstance(data, list) and len(data) > 0 and all(isinstance(x, np.ndarray) for x in data):
self.__init_from_list_np2d(data, params_str, ref_dataset)
elif isinstance(data, DataTable):
elif isinstance(data, dt_DataTable):
self.__init_from_np2d(data.to_numpy(), params_str, ref_dataset)
else:
try:
Expand Down Expand Up @@ -1939,9 +1939,9 @@ def get_data(self):
if self.data is not None:
if isinstance(self.data, np.ndarray) or scipy.sparse.issparse(self.data):
self.data = self.data[self.used_indices, :]
elif isinstance(self.data, DataFrame):
elif isinstance(self.data, pd_DataFrame):
self.data = self.data.iloc[self.used_indices].copy()
elif isinstance(self.data, DataTable):
elif isinstance(self.data, dt_DataTable):
self.data = self.data[self.used_indices, :]
else:
_log_warning("Cannot subset {} type of raw data.\n"
Expand Down Expand Up @@ -2061,49 +2061,49 @@ def add_features_from(self, other):
self.data = np.hstack((self.data, other.data))
elif scipy.sparse.issparse(other.data):
self.data = np.hstack((self.data, other.data.toarray()))
elif isinstance(other.data, DataFrame):
elif isinstance(other.data, pd_DataFrame):
self.data = np.hstack((self.data, other.data.values))
elif isinstance(other.data, DataTable):
elif isinstance(other.data, dt_DataTable):
self.data = np.hstack((self.data, other.data.to_numpy()))
else:
self.data = None
elif scipy.sparse.issparse(self.data):
sparse_format = self.data.getformat()
if isinstance(other.data, np.ndarray) or scipy.sparse.issparse(other.data):
self.data = scipy.sparse.hstack((self.data, other.data), format=sparse_format)
elif isinstance(other.data, DataFrame):
elif isinstance(other.data, pd_DataFrame):
self.data = scipy.sparse.hstack((self.data, other.data.values), format=sparse_format)
elif isinstance(other.data, DataTable):
elif isinstance(other.data, dt_DataTable):
self.data = scipy.sparse.hstack((self.data, other.data.to_numpy()), format=sparse_format)
else:
self.data = None
elif isinstance(self.data, DataFrame):
elif isinstance(self.data, pd_DataFrame):
if not PANDAS_INSTALLED:
raise LightGBMError("Cannot add features to DataFrame type of raw data "
"without pandas installed")
if isinstance(other.data, np.ndarray):
self.data = concat((self.data, DataFrame(other.data)),
self.data = concat((self.data, pd_DataFrame(other.data)),
axis=1, ignore_index=True)
elif scipy.sparse.issparse(other.data):
self.data = concat((self.data, DataFrame(other.data.toarray())),
self.data = concat((self.data, pd_DataFrame(other.data.toarray())),
axis=1, ignore_index=True)
elif isinstance(other.data, DataFrame):
elif isinstance(other.data, pd_DataFrame):
self.data = concat((self.data, other.data),
axis=1, ignore_index=True)
elif isinstance(other.data, DataTable):
self.data = concat((self.data, DataFrame(other.data.to_numpy())),
elif isinstance(other.data, dt_DataTable):
self.data = concat((self.data, pd_DataFrame(other.data.to_numpy())),
axis=1, ignore_index=True)
else:
self.data = None
elif isinstance(self.data, DataTable):
elif isinstance(self.data, dt_DataTable):
if isinstance(other.data, np.ndarray):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data)))
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data)))
elif scipy.sparse.issparse(other.data):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.toarray())))
elif isinstance(other.data, DataFrame):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.values)))
elif isinstance(other.data, DataTable):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.to_numpy())))
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.toarray())))
elif isinstance(other.data, pd_DataFrame):
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.values)))
elif isinstance(other.data, dt_DataTable):
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.to_numpy())))
else:
self.data = None
else:
Expand Down Expand Up @@ -2496,7 +2496,7 @@ def tree_dict_to_node_list(tree, node_depth=1, tree_index=None,
tree_index=tree['tree_index'],
feature_names=feature_names))

return DataFrame(model_list, columns=model_list[0].keys())
return pd_DataFrame(model_list, columns=model_list[0].keys())

def set_train_data_name(self, name):
"""Set the name to the training Dataset.
Expand Down Expand Up @@ -3345,7 +3345,7 @@ def add(root):
ret = np.column_stack((bin_edges[1:], hist))
ret = ret[ret[:, 1] > 0]
if PANDAS_INSTALLED:
return DataFrame(ret, columns=['SplitValue', 'Count'])
return pd_DataFrame(ret, columns=['SplitValue', 'Count'])
else:
return ret
else:
Expand Down
18 changes: 10 additions & 8 deletions python-package/lightgbm/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@

"""pandas"""
try:
from pandas import Series, DataFrame, concat
from pandas import concat
from pandas import Series as pd_Series
from pandas import DataFrame as pd_DataFrame
from pandas.api.types import is_sparse as is_dtype_sparse
PANDAS_INSTALLED = True
except ImportError:
PANDAS_INSTALLED = False

class Series:
class pd_Series:
"""Dummy class for pandas.Series."""

pass

class DataFrame:
class pd_DataFrame:
"""Dummy class for pandas.DataFrame."""

pass
Expand All @@ -40,15 +42,15 @@ class DataFrame:
try:
import datatable
if hasattr(datatable, "Frame"):
DataTable = datatable.Frame
dt_DataTable = datatable.Frame
else:
DataTable = datatable.DataTable
dt_DataTable = datatable.DataTable
DATATABLE_INSTALLED = True
except ImportError:
DATATABLE_INSTALLED = False

class DataTable:
"""Dummy class for DataTable."""
class dt_DataTable:
"""Dummy class for datatable.DataTable."""

pass

Expand Down Expand Up @@ -128,6 +130,6 @@ class dask_Array:
pass

class dask_Frame:
"""Dummy class for ddask.dataframe._Frame."""
"""Dummy class for dask.dataframe._Frame."""

pass
14 changes: 7 additions & 7 deletions python-package/lightgbm/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import scipy.sparse as ss

from .basic import _choose_param_value, _ConfigAliases, _LIB, _log_warning, _safe_call, LightGBMError
from .compat import (PANDAS_INSTALLED, DataFrame, Series, concat,
from .compat import (PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat,
SKLEARN_INSTALLED,
DASK_INSTALLED, dask_Frame, dask_Array, delayed, Client, default_client, get_worker, wait)
from .sklearn import LGBMClassifier, LGBMRegressor, LGBMRanker
Expand Down Expand Up @@ -105,7 +105,7 @@ def _find_ports_for_workers(client: Client, worker_addresses: Iterable[str], loc
def _concat(seq):
if isinstance(seq[0], np.ndarray):
return np.concatenate(seq, axis=0)
elif isinstance(seq[0], (DataFrame, Series)):
elif isinstance(seq[0], (pd_DataFrame, pd_Series)):
return concat(seq, axis=0)
elif isinstance(seq[0], ss.spmatrix):
return ss.vstack(seq, format='csr')
Expand Down Expand Up @@ -304,7 +304,7 @@ def _train(client, data, label, params, model_factory, sample_weight=None, group


def _predict_part(part, model, raw_score, pred_proba, pred_leaf, pred_contrib, **kwargs):
data = part.values if isinstance(part, DataFrame) else part
data = part.values if isinstance(part, pd_DataFrame) else part

if data.shape[0] == 0:
result = np.array([])
Expand All @@ -325,11 +325,11 @@ def _predict_part(part, model, raw_score, pred_proba, pred_leaf, pred_contrib, *
**kwargs
)

if isinstance(part, DataFrame):
if isinstance(part, pd_DataFrame):
if pred_proba or pred_contrib:
result = DataFrame(result, index=part.index)
result = pd_DataFrame(result, index=part.index)
else:
result = Series(result, index=part.index, name='predictions')
result = pd_Series(result, index=part.index, name='predictions')

return result

Expand Down Expand Up @@ -361,7 +361,7 @@ def _predict(model, data, raw_score=False, pred_proba=False, pred_leaf=False, pr
-------
predicted_result : dask array of shape = [n_samples] or shape = [n_samples, n_classes]
The predicted values.
X_leaves : dask arrayof shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]
X_leaves : dask array of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]
If ``pred_leaf=True``, the predicted leaf of every tree for each sample.
X_SHAP_values : dask array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects
If ``pred_contrib=True``, the feature contributions for each sample.
Expand Down
6 changes: 3 additions & 3 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight,
_LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight,
DataFrame, DataTable)
pd_DataFrame, dt_DataTable)
from .engine import train


Expand Down Expand Up @@ -560,7 +560,7 @@ def fit(self, X, y,
params['metric'] = [e for e in eval_metrics_builtin if e not in original_metric] + original_metric
params['metric'] = [metric for metric in params['metric'] if metric is not None]

if not isinstance(X, (DataFrame, DataTable)):
if not isinstance(X, (pd_DataFrame, dt_DataTable)):
_X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2)
if sample_weight is not None:
sample_weight = _LGBMCheckSampleWeight(sample_weight, _X)
Expand Down Expand Up @@ -695,7 +695,7 @@ def predict(self, X, raw_score=False, start_iteration=0, num_iteration=None,
"""
if self._n_features is None:
raise LGBMNotFittedError("Estimator not fitted, call `fit` before exploiting the model.")
if not isinstance(X, (DataFrame, DataTable)):
if not isinstance(X, (pd_DataFrame, dt_DataTable)):
X = _LGBMCheckArray(X, accept_sparse=True, force_all_finite=False)
n_features = X.shape[1]
if self._n_features != n_features:
Expand Down