From 4394d9f2fa91449c013de0b013d343644792fe4c Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 26 Feb 2020 01:53:58 +0800 Subject: [PATCH] Define lazy isinstance for Python compat. * Avoid importing datatable. * Fix #5363. --- python-package/xgboost/compat.py | 29 ++++++++--------------------- python-package/xgboost/core.py | 11 ++++++----- tests/python/test_basic.py | 5 +++++ tests/python/testing.py | 6 ++++-- 4 files changed, 23 insertions(+), 28 deletions(-) diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index bae283de5c09..23c36a4d5a41 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -79,6 +79,14 @@ def os_fspath(path): # END NUMPY PATHLIB ATTRIBUTION ############################################################################### + +def lazy_isinstance(instance, module, name): + '''Use string representation to identify a type.''' + module = type(instance).__module__ == module + name = type(instance).__name__ == name + return module and name + + # pandas try: from pandas import DataFrame, Series @@ -95,27 +103,6 @@ def os_fspath(path): pandas_concat = None PANDAS_INSTALLED = False -# dt -try: - # Workaround for #4473, compatibility with dask - if sys.__stdin__ is not None and sys.__stdin__.closed: - sys.__stdin__ = None - import datatable - - if hasattr(datatable, "Frame"): - DataTable = datatable.Frame - else: - DataTable = datatable.DataTable - DT_INSTALLED = True -except ImportError: - - # pylint: disable=too-few-public-methods - class DataTable(object): - """ dummy for datatable.DataTable """ - - DT_INSTALLED = False - - # cudf try: from cudf import DataFrame as CUDF_DataFrame diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index f134c0399999..2fba6ad93577 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -19,9 +19,9 @@ from .compat import ( STRING_TYPES, DataFrame, MultiIndex, Int64Index, py_str, - PANDAS_INSTALLED, DataTable, - CUDF_INSTALLED, CUDF_DataFrame, CUDF_Series, CUDF_MultiIndex, - os_fspath, os_PathLike) + PANDAS_INSTALLED, CUDF_INSTALLED, + CUDF_DataFrame, CUDF_Series, CUDF_MultiIndex, + os_fspath, os_PathLike, lazy_isinstance) from .libpath import find_lib_path # c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h @@ -319,7 +319,8 @@ def _maybe_cudf_dataframe(data, feature_names, feature_types): def _maybe_dt_data(data, feature_names, feature_types, meta=None, meta_type=None): """Validate feature names and types if data table""" - if not isinstance(data, DataTable): + if (not lazy_isinstance(data, 'datatable', 'Frame') and + not lazy_isinstance(data, 'datatable', 'DataTable')): return data, feature_names, feature_types if meta and data.shape[1] > 1: @@ -470,7 +471,7 @@ def __init__(self, data, label=None, weight=None, base_margin=None, self._init_from_csc(data) elif isinstance(data, np.ndarray): self._init_from_npy2d(data, missing, nthread) - elif isinstance(data, DataTable): + elif lazy_isinstance(data, 'datatable', 'Frame'): self._init_from_dt(data, nthread) elif hasattr(data, "__cuda_array_interface__"): self._init_from_array_interface(data, missing, nthread) diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py index a38f9a26cac6..e7d15cafec13 100644 --- a/tests/python/test_basic.py +++ b/tests/python/test_basic.py @@ -36,6 +36,11 @@ def captured_output(): class TestBasic(unittest.TestCase): + def test_compat(self): + from xgboost.compat import lazy_isinstance + a = np.array([1, 2, 3]) + assert lazy_isinstance(a, 'numpy', 'ndarray') + assert not lazy_isinstance(a, 'numpy', 'dataframe') def test_basic(self): dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') diff --git a/tests/python/testing.py b/tests/python/testing.py index d77c2c4ad38f..708e5af4ca55 100644 --- a/tests/python/testing.py +++ b/tests/python/testing.py @@ -1,5 +1,5 @@ # coding: utf-8 -from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED, DT_INSTALLED +from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED from xgboost.compat import CUDF_INSTALLED, DASK_INSTALLED @@ -19,7 +19,9 @@ def no_pandas(): def no_dt(): - return {'condition': not DT_INSTALLED, + import importlib.util + spec = importlib.util.find_spec('datatable') + return {'condition': spec is None, 'reason': 'Datatable is not installed.'}