Skip to content

Commit

Permalink
Speed up python test (#5752)
Browse files Browse the repository at this point in the history
* Speed up tests

* Prevent DeviceQuantileDMatrix initialisation with numpy

* Use joblib.memory

* Use RandomState
  • Loading branch information
RAMitchell authored Jun 4, 2020
1 parent cfc23c6 commit 359023c
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 18 deletions.
4 changes: 0 additions & 4 deletions python-package/xgboost/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,10 +566,6 @@ def handle_input(self, data, feature_names, feature_types):

__device_quantile_dmatrix_registry.register_handler(
'cupy.core.core', 'ndarray', DeviceQuantileCudaArrayInterfaceHandler)
__device_quantile_dmatrix_registry.register_handler_opaque(
lambda x: hasattr(x, '__array__'), NumpyHandler)
__device_quantile_dmatrix_registry.register_handler_opaque(
lambda x: hasattr(x, '__cuda_array_interface__'), NumpyHandler)


class DeviceQuantileCudaColumnarHandler(DeviceQuantileDMatrixDataHandler,
Expand Down
22 changes: 22 additions & 0 deletions tests/python-gpu/test_device_quantile_dmatrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
import numpy as np
import xgboost as xgb
import unittest
import pytest
import sys

sys.path.append("tests/python")
import testing as tm


class TestDeviceQuantileDMatrix(unittest.TestCase):
def test_dmatrix_numpy_init(self):
data = np.random.randn(5, 5)
with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'):
dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))

@pytest.mark.skipif(**tm.no_cupy())
def test_dmatrix_cupy_init(self):
import cupy as cp
data = cp.random.randn(5, 5)
dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
9 changes: 4 additions & 5 deletions tests/python-gpu/test_gpu_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,19 @@
import unittest

sys.path.append('tests/python/')
import test_linear # noqa: E402
import testing as tm # noqa: E402
import test_linear # noqa: E402
import testing as tm # noqa: E402


class TestGPULinear(unittest.TestCase):

datasets = ["Boston", "Digits", "Cancer", "Sparse regression"]
common_param = {
'booster': ['gblinear'],
'updater': ['gpu_coord_descent'],
'eta': [0.5],
'top_k': [10],
'tolerance': [1e-5],
'alpha': [.005, .1],
'alpha': [.1],
'lambda': [0.005],
'coordinate_selection': ['cyclic', 'random', 'greedy']}

Expand All @@ -26,6 +25,6 @@ def test_gpu_coordinate(self):
parameters['gpu_id'] = [0]
for param in test_linear.parameter_combinations(parameters):
results = test_linear.run_suite(
param, 150, self.datasets, scale_features=True)
param, 100, self.datasets, scale_features=True)
test_linear.assert_regression_result(results, 1e-2)
test_linear.assert_classification_result(results)
1 change: 1 addition & 0 deletions tests/python-gpu/test_gpu_updaters.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def test_gpu_hist_device_dmatrix(self):
device_dmatrix_datasets = ["Boston", "Cancer", "Digits"]
for param in test_param:
param['tree_method'] = 'gpu_hist'

gpu_results_device_dmatrix = run_suite(param, select_datasets=device_dmatrix_datasets,
DMatrixT=xgb.DeviceQuantileDMatrix,
dmatrix_params={'max_bin': param['max_bin']})
Expand Down
35 changes: 26 additions & 9 deletions tests/python/regression_test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import os
import sys
import xgboost as xgb
from joblib import Memory
memory = Memory('./cachedir', verbose=0)

try:
from sklearn import datasets
Expand Down Expand Up @@ -39,27 +41,35 @@ def __repr__(self):
return self.__str__()


@memory.cache
def get_boston():
data = datasets.load_boston()
return data.data, data.target


@memory.cache
def get_digits():
data = datasets.load_digits()
return data.data, data.target


@memory.cache
def get_cancer():
data = datasets.load_breast_cancer()
return data.data, data.target


@memory.cache
def get_sparse():
rng = np.random.RandomState(199)
n = 5000
n = 2000
sparsity = 0.75
X, y = datasets.make_regression(n, random_state=rng)
X = np.array([[0.0 if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X])
flag = rng.binomial(1, sparsity, X.shape)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if flag[i, j]:
X[i, j] = 0.0
from scipy import sparse
X = sparse.csr_matrix(X)
return X, y
Expand All @@ -73,14 +83,18 @@ def get_small_weights():
return get_weights_regression(1e-6, 1e-5)


@memory.cache
def get_weights_regression(min_weight, max_weight):
rng = np.random.RandomState(199)
n = 10000
n = 2000
sparsity = 0.25
X, y = datasets.make_regression(n, random_state=rng)
X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x
for x in x_row] for x_row in X])
w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)])
flag = rng.binomial(1, sparsity, X.shape)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if flag[i, j]:
X[i, j] = np.nan
w = rng.uniform(min_weight, max_weight, n)
return X, y, w


Expand All @@ -101,10 +115,12 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri
np.savetxt('tmptmp_1234.csv', np.hstack((dataset.y.reshape(len(dataset.y), 1), X)),
delimiter=',')
dtrain = DMatrixT('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_',
weight=dataset.w)
weight=dataset.w)
elif DMatrixT is xgb.DeviceQuantileDMatrix:
import cupy as cp
dtrain = DMatrixT(cp.array(X), dataset.y, weight=dataset.w, **dmatrix_params)
dtrain = DMatrixT(cp.array(X), cp.array(dataset.y),
weight=None if dataset.w is None else cp.array(dataset.w),
**dmatrix_params)
else:
dtrain = DMatrixT(X, dataset.y, weight=dataset.w, **dmatrix_params)

Expand Down Expand Up @@ -146,7 +162,8 @@ def parameter_combinations(variable_param):
def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False,
DMatrixT=xgb.DMatrix, dmatrix_params={}):
"""
Run the given parameters on a range of datasets. Objective and eval metric will be automatically set
Run the given parameters on a range of datasets. Objective and eval metric will be
automatically set
"""
datasets = [
Dataset("Boston", get_boston, "reg:squarederror", "rmse"),
Expand Down

0 comments on commit 359023c

Please sign in to comment.