Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for dlpack, expose python docs for DeviceQuantileDMatrix #5465

Merged
merged 1 commit into from
Apr 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/python/python_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ Core Data Structure
:members:
:show-inheritance:

.. autoclass:: xgboost.DeviceQuantileDMatrix
:show-inheritance:

.. autoclass:: xgboost.Booster
:members:
:show-inheritance:
Expand Down
30 changes: 22 additions & 8 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,17 @@ def _maybe_dt_data(data, feature_names, feature_types,

return data, feature_names, feature_types

def _is_dlpack(x):
return 'PyCapsule' in str(type(x)) and "dltensor" in str(x)

# Just convert dlpack into cupy (zero copy)
def _maybe_dlpack_data(data, feature_names, feature_types):
if not _is_dlpack(data):
return data, feature_names, feature_types
from cupy import fromDlpack # pylint: disable=E0401
data = fromDlpack(data)
return data, feature_names, feature_types


def _convert_dataframes(data, feature_names, feature_types,
meta=None, meta_type=None):
Expand All @@ -399,6 +410,9 @@ def _convert_dataframes(data, feature_names, feature_types,
data, feature_names, feature_types = _maybe_cudf_dataframe(
data, feature_names, feature_types)

data, feature_names, feature_types = _maybe_dlpack_data(
data, feature_names, feature_types)

return data, feature_names, feature_types


Expand Down Expand Up @@ -439,7 +453,7 @@ def __init__(self, data, label=None, weight=None, base_margin=None,
"""Parameters
----------
data : os.PathLike/string/numpy.array/scipy.sparse/pd.DataFrame/
dt.Frame/cudf.DataFrame/cupy.array
dt.Frame/cudf.DataFrame/cupy.array/dlpack
Data source of DMatrix.
When data is string or os.PathLike type, it represents the path
libsvm format txt file, csv file (by specifying uri parameter
Expand Down Expand Up @@ -1028,12 +1042,12 @@ def feature_types(self, feature_types):
class DeviceQuantileDMatrix(DMatrix):
"""Device memory Data Matrix used in XGBoost for training with tree_method='gpu_hist'. Do not
use this for test/validation tasks as some information may be lost in quantisation. This
DMatrix is primarily designed to save memory in training and avoids intermediate steps,
directly creating a compressed representation for training without allocating additional
memory. Implementation does not currently consider weights in quantisation process(unlike
DMatrix).
DMatrix is primarily designed to save memory in training from device memory inputs by
avoiding intermediate storage. Implementation does not currently consider weights in
quantisation process(unlike DMatrix). Set max_bin to control the number of bins during
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking we remove support for weight all together for now.

quantisation.

You can construct DeviceDMatrix from cupy/cudf
You can construct DeviceQuantileDMatrix from cupy/cudf/dlpack.
"""

def __init__(self, data, label=None, weight=None, base_margin=None,
Expand All @@ -1044,8 +1058,8 @@ def __init__(self, data, label=None, weight=None, base_margin=None,
nthread=None, max_bin=256):
self.max_bin = max_bin
if not (hasattr(data, "__cuda_array_interface__") or (
CUDF_INSTALLED and isinstance(data, CUDF_DataFrame))):
raise ValueError('Only cupy/cudf currently supported for DeviceDMatrix')
CUDF_INSTALLED and isinstance(data, CUDF_DataFrame)) or _is_dlpack(data)):
raise ValueError('Only cupy/cudf/dlpack currently supported for DeviceQuantileDMatrix')

super().__init__(data, label=label, weight=weight, base_margin=base_margin,
missing=missing,
Expand Down
16 changes: 15 additions & 1 deletion tests/python-gpu/test_from_cupy.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _test_cupy_metainfo(DMatrixT):
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cupy.get_uint_info('group_ptr'))


class TestFromArrayInterface:
class TestFromCupy:
'''Tests for constructing DMatrix from data structure conforming Apache
Arrow specification.'''

Expand All @@ -122,3 +122,17 @@ def test_cupy_metainfo_simple_dmat(self):
@pytest.mark.skipif(**tm.no_cupy())
def test_cupy_metainfo_device_dmat(self):
_test_cupy_metainfo(xgb.DeviceQuantileDMatrix)

@pytest.mark.skipif(**tm.no_cupy())
def test_dlpack_simple_dmat(self):
import cupy as cp
n = 100
X = cp.random.random((n, 2))
xgb.DMatrix(X.toDlpack())

@pytest.mark.skipif(**tm.no_cupy())
def test_dlpack_device_dmat(self):
import cupy as cp
n = 100
X = cp.random.random((n, 2))
xgb.DeviceQuantileDMatrix(X.toDlpack())