Skip to content

Commit

Permalink
Define lazy isinstance for Python compat. (#5364)
Browse files Browse the repository at this point in the history
* Avoid importing datatable.
* Fix #5363.
  • Loading branch information
trivialfis authored Feb 26, 2020
1 parent 0fd455e commit a461a9a
Showing 4 changed files with 23 additions and 28 deletions.
29 changes: 8 additions & 21 deletions python-package/xgboost/compat.py
Original file line number Diff line number Diff line change
@@ -79,6 +79,14 @@ def os_fspath(path):
# END NUMPY PATHLIB ATTRIBUTION
###############################################################################


def lazy_isinstance(instance, module, name):
'''Use string representation to identify a type.'''
module = type(instance).__module__ == module
name = type(instance).__name__ == name
return module and name


# pandas
try:
from pandas import DataFrame, Series
@@ -95,27 +103,6 @@ def os_fspath(path):
pandas_concat = None
PANDAS_INSTALLED = False

# dt
try:
# Workaround for #4473, compatibility with dask
if sys.__stdin__ is not None and sys.__stdin__.closed:
sys.__stdin__ = None
import datatable

if hasattr(datatable, "Frame"):
DataTable = datatable.Frame
else:
DataTable = datatable.DataTable
DT_INSTALLED = True
except ImportError:

# pylint: disable=too-few-public-methods
class DataTable(object):
""" dummy for datatable.DataTable """

DT_INSTALLED = False


# cudf
try:
from cudf import DataFrame as CUDF_DataFrame
11 changes: 6 additions & 5 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
@@ -19,9 +19,9 @@

from .compat import (
STRING_TYPES, DataFrame, MultiIndex, Int64Index, py_str,
PANDAS_INSTALLED, DataTable,
CUDF_INSTALLED, CUDF_DataFrame, CUDF_Series, CUDF_MultiIndex,
os_fspath, os_PathLike)
PANDAS_INSTALLED, CUDF_INSTALLED,
CUDF_DataFrame, CUDF_Series, CUDF_MultiIndex,
os_fspath, os_PathLike, lazy_isinstance)
from .libpath import find_lib_path

# c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h
@@ -319,7 +319,8 @@ def _maybe_cudf_dataframe(data, feature_names, feature_types):
def _maybe_dt_data(data, feature_names, feature_types,
meta=None, meta_type=None):
"""Validate feature names and types if data table"""
if not isinstance(data, DataTable):
if (not lazy_isinstance(data, 'datatable', 'Frame') and
not lazy_isinstance(data, 'datatable', 'DataTable')):
return data, feature_names, feature_types

if meta and data.shape[1] > 1:
@@ -470,7 +471,7 @@ def __init__(self, data, label=None, weight=None, base_margin=None,
self._init_from_csc(data)
elif isinstance(data, np.ndarray):
self._init_from_npy2d(data, missing, nthread)
elif isinstance(data, DataTable):
elif lazy_isinstance(data, 'datatable', 'Frame'):
self._init_from_dt(data, nthread)
elif hasattr(data, "__cuda_array_interface__"):
self._init_from_array_interface(data, missing, nthread)
5 changes: 5 additions & 0 deletions tests/python/test_basic.py
Original file line number Diff line number Diff line change
@@ -36,6 +36,11 @@ def captured_output():


class TestBasic(unittest.TestCase):
def test_compat(self):
from xgboost.compat import lazy_isinstance
a = np.array([1, 2, 3])
assert lazy_isinstance(a, 'numpy', 'ndarray')
assert not lazy_isinstance(a, 'numpy', 'dataframe')

def test_basic(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
6 changes: 4 additions & 2 deletions tests/python/testing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# coding: utf-8
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED, DT_INSTALLED
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
from xgboost.compat import CUDF_INSTALLED, DASK_INSTALLED


@@ -19,7 +19,9 @@ def no_pandas():


def no_dt():
return {'condition': not DT_INSTALLED,
import importlib.util
spec = importlib.util.find_spec('datatable')
return {'condition': spec is None,
'reason': 'Datatable is not installed.'}


0 comments on commit a461a9a

Please sign in to comment.