From 75a0025a3d76a4f69d67a50201ae68069925ca33 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 28 May 2020 03:23:36 +0800 Subject: [PATCH 1/4] [CI] Remove CUDA 9.0 from Windows CI. (#5674) * Remove CUDA 9.0 on Windows CI. * Require cuda10 tag, to differentiate Co-authored-by: Philip Hyunsu Cho --- Jenkinsfile-win64 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile-win64 b/Jenkinsfile-win64 index 15dc345ed2ac..479a78cd94f3 100644 --- a/Jenkinsfile-win64 +++ b/Jenkinsfile-win64 @@ -28,7 +28,7 @@ pipeline { steps { script { parallel ([ - 'build-win64-cuda9.0': { BuildWin64() } + 'build-win64-cuda10.0': { BuildWin64() } ]) } milestone ordinal: 2 @@ -40,7 +40,6 @@ pipeline { script { parallel ([ 'test-win64-cpu': { TestWin64CPU() }, - 'test-win64-gpu-cuda9.0': { TestWin64GPU(cuda_target: 'cuda9') }, 'test-win64-gpu-cuda10.0': { TestWin64GPU(cuda_target: 'cuda10_0') }, 'test-win64-gpu-cuda10.1': { TestWin64GPU(cuda_target: 'cuda10_1') } ]) @@ -67,7 +66,7 @@ def checkoutSrcs() { } def BuildWin64() { - node('win64 && build') { + node('win64 && build && cuda10') { unstash name: 'srcs' echo "Building XGBoost for Windows AMD64 target..." bat "nvcc --version" From fdbb6ae856e37b881fb706eab8e8fe4d6cd4c024 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Wed, 27 May 2020 16:18:18 -0700 Subject: [PATCH 2/4] Require CUDA 10.0+ in CMake build (#5718) --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 10e3dbfc63de..b42f5775fb6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,9 @@ if (USE_CUDA) message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}") enable_language(CUDA) + if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 10.0) + message(FATAL_ERROR "CUDA version must be at least 10.0!") + endif() set(GEN_CODE "") format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE) message(STATUS "CUDA GEN_CODE: ${GEN_CODE}") From 91c646392db01714a4f69c33170fb0857b95664d Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Wed, 27 May 2020 16:19:30 -0700 Subject: [PATCH 3/4] Require Python 3.6+; drop Python 3.5 from CI (#5715) --- Jenkinsfile | 1 - python-package/setup.py | 3 +-- python-package/xgboost/__init__.py | 6 ------ tests/ci_build/Dockerfile.cpu | 6 ------ tests/ci_build/test_python.sh | 8 +------- 5 files changed, 2 insertions(+), 22 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index dede061e9e51..744bba67913c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -305,7 +305,6 @@ def TestPythonCPU() { def docker_binary = "docker" sh """ ${dockerRun} ${container_type} ${docker_binary} tests/ci_build/test_python.sh cpu - ${dockerRun} ${container_type} ${docker_binary} tests/ci_build/test_python.sh cpu-py35 """ deleteDir() } diff --git a/python-package/setup.py b/python-package/setup.py index 0e135b22216a..857e78f5fbbf 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -309,11 +309,10 @@ def run(self): 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8'], - python_requires='>=3.5', + python_requires='>=3.6', url='https://github.com/dmlc/xgboost') clean_up() diff --git a/python-package/xgboost/__init__.py b/python-package/xgboost/__init__.py index f4724e9d91ec..c18030ec1d16 100644 --- a/python-package/xgboost/__init__.py +++ b/python-package/xgboost/__init__.py @@ -22,12 +22,6 @@ except ImportError: pass -if sys.version_info[:2] == (3, 5): - warnings.warn( - 'Python 3.5 support is deprecated; XGBoost will require Python 3.6+ in the near future. ' + - 'Consider upgrading to Python 3.6+.', - FutureWarning) - VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION') with open(VERSION_FILE) as f: __version__ = f.read().strip() diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index ad4464e3f4a4..7e18fd99006f 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -19,12 +19,6 @@ ENV PATH=/opt/python/bin:$PATH ENV GOSU_VERSION 1.10 -# Create new Conda environment with Python 3.5 -RUN conda create -n py35 python=3.5 && \ - source activate py35 && \ - pip install numpy pytest scipy scikit-learn pandas matplotlib wheel kubernetes urllib3 graphviz && \ - source deactivate - # Install Python packages in default env RUN \ pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh \ diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh index 78dc077c24f2..2730f51aafa3 100755 --- a/tests/ci_build/test_python.sh +++ b/tests/ci_build/test_python.sh @@ -65,14 +65,8 @@ case "$suite" in ./runtests.sh ;; - cpu-py35) - source activate py35 - install_xgboost - pytest -v -s --fulltrace tests/python - ;; - *) - echo "Usage: $0 {gpu|mgpu|cudf|cpu|cpu-py35}" + echo "Usage: $0 {gpu|mgpu|cudf|cpu}" exit 1 ;; esac From 35e2205256b1b236ba9b1eccc5d89aee5b2f5e6f Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 28 May 2020 17:51:20 +0800 Subject: [PATCH 4/4] [dask] Return GPU Series when input is from cuDF. (#5710) * Refactor predict function. --- python-package/xgboost/compat.py | 7 --- python-package/xgboost/core.py | 9 ++-- python-package/xgboost/dask.py | 62 +++++++++++++------------- tests/python-gpu/test_gpu_with_dask.py | 18 +++++++- tests/python/testing.py | 8 +++- 5 files changed, 58 insertions(+), 46 deletions(-) diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index 3064d3501b6b..281db3bcb3ed 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -105,15 +105,8 @@ def lazy_isinstance(instance, module, name): # cudf try: - from cudf import DataFrame as CUDF_DataFrame - from cudf import Series as CUDF_Series from cudf import concat as CUDF_concat - CUDF_INSTALLED = True except ImportError: - CUDF_DataFrame = object - CUDF_Series = object - CUDF_MultiIndex = object - CUDF_INSTALLED = False CUDF_concat = None # sklearn diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index d701309a9824..f25886c3c36f 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -17,8 +17,7 @@ from .compat import ( STRING_TYPES, DataFrame, py_str, - PANDAS_INSTALLED, CUDF_INSTALLED, - CUDF_DataFrame, + PANDAS_INSTALLED, os_fspath, os_PathLike, lazy_isinstance) from .libpath import find_lib_path @@ -282,8 +281,8 @@ def _convert_unknown_data(data, meta=None, meta_type=None): # Either object has cuda array interface or contains columns with interfaces def _has_cuda_array_interface(data): - return hasattr(data, '__cuda_array_interface__') or ( - CUDF_INSTALLED and isinstance(data, CUDF_DataFrame)) + return hasattr(data, '__cuda_array_interface__') or \ + lazy_isinstance(data, 'cudf.core.dataframe', 'DataFrame') def _cudf_array_interfaces(df): @@ -508,7 +507,7 @@ def set_uint_info(self, field, data): def set_interface_info(self, field, data): """Set info type property into DMatrix.""" # If we are passed a dataframe, extract the series - if CUDF_INSTALLED and isinstance(data, CUDF_DataFrame): + if lazy_isinstance(data, 'cudf.core.dataframe', 'DataFrame'): if len(data.columns) != 1: raise ValueError( 'Expecting meta-info to contain a single column') diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 1cc15d744e82..5e7e8624fdbf 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -25,7 +25,7 @@ from .compat import da, dd, delayed, get_client from .compat import sparse, scipy_sparse from .compat import PANDAS_INSTALLED, DataFrame, Series, pandas_concat -from .compat import CUDF_INSTALLED, CUDF_DataFrame, CUDF_Series, CUDF_concat +from .compat import CUDF_concat from .compat import lazy_isinstance from .core import DMatrix, Booster, _expect @@ -97,7 +97,8 @@ def concat(value): # pylint: disable=too-many-return-statements return sparse.concatenate(value, axis=0) if PANDAS_INSTALLED and isinstance(value[0], (DataFrame, Series)): return pandas_concat(value, axis=0) - if CUDF_INSTALLED and isinstance(value[0], (CUDF_DataFrame, CUDF_Series)): + if lazy_isinstance(value[0], 'cudf.core.dataframe', 'DataFrame') or \ + lazy_isinstance(value[0], 'cudf.core.series', 'Series'): return CUDF_concat(value, axis=0) if lazy_isinstance(value[0], 'cupy.core.core', 'ndarray'): import cupy # pylint: disable=import-error @@ -461,6 +462,25 @@ def dispatched_train(worker_addr): return list(filter(lambda ret: ret is not None, results))[0] +def _direct_predict_impl(client, data, predict_fn): + if isinstance(data, da.Array): + predictions = client.submit( + da.map_blocks, + predict_fn, data, False, drop_axis=1, + dtype=numpy.float32 + ).result() + return predictions + if isinstance(data, dd.DataFrame): + predictions = client.submit( + dd.map_partitions, + predict_fn, data, True, + meta=dd.utils.make_meta({'prediction': 'f4'}) + ).result() + return predictions.iloc[:, 0] + raise TypeError('data of type: ' + str(type(data)) + + ' is not supported by direct prediction') + + def predict(client, model, data, *args, missing=numpy.nan): '''Run prediction with a trained booster. @@ -502,26 +522,19 @@ def predict(client, model, data, *args, missing=numpy.nan): def mapped_predict(partition, is_df): worker = distributed_get_worker() + booster.set_param({'nthread': worker.nthreads}) m = DMatrix(partition, missing=missing, nthread=worker.nthreads) predt = booster.predict(m, *args, validate_features=False) if is_df: - predt = DataFrame(predt, columns=['prediction']) + if lazy_isinstance(partition, 'cudf', 'core.dataframe.DataFrame'): + import cudf # pylint: disable=import-error + predt = cudf.DataFrame(predt, columns=['prediction']) + else: + predt = DataFrame(predt, columns=['prediction']) return predt - if isinstance(data, da.Array): - predictions = client.submit( - da.map_blocks, - mapped_predict, data, False, drop_axis=1, - dtype=numpy.float32 - ).result() - return predictions - if isinstance(data, dd.DataFrame): - predictions = client.submit( - dd.map_partitions, - mapped_predict, data, True, - meta=dd.utils.make_meta({'prediction': 'f4'}) - ).result() - return predictions.iloc[:, 0] + if isinstance(data, (da.Array, dd.DataFrame)): + return _direct_predict_impl(client, data, mapped_predict) # Prediction on dask DMatrix. worker_map = data.worker_map @@ -644,20 +657,7 @@ def mapped_predict(data, is_df): dtype=numpy.float32) return prediction - if isinstance(data, da.Array): - predictions = client.submit( - da.map_blocks, - mapped_predict, data, False, drop_axis=1, - dtype=numpy.float32 - ).result() - return predictions - if isinstance(data, dd.DataFrame): - predictions = client.submit( - dd.map_partitions, - mapped_predict, data, True, - meta=dd.utils.make_meta({'prediction': 'f4'}) - ).result() - return predictions.iloc[:, 0] + return _direct_predict_impl(client, data, mapped_predict) def _evaluation_matrices(client, validation_set, sample_weights, missing): diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py index 7cc45a428459..97eeb5bdf30f 100644 --- a/tests/python-gpu/test_gpu_with_dask.py +++ b/tests/python-gpu/test_gpu_with_dask.py @@ -44,10 +44,10 @@ def test_dask_dataframe(self): out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], - num_boost_round=2) + num_boost_round=4) assert isinstance(out['booster'], dxgb.Booster) - assert len(out['history']['X']['rmse']) == 2 + assert len(out['history']['X']['rmse']) == 4 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) @@ -62,6 +62,20 @@ def test_dask_dataframe(self): cupy.testing.assert_allclose(single_node, predictions) cupy.testing.assert_allclose(single_node, series_predictions) + predt = dxgb.predict(client, out, X) + assert isinstance(predt, dd.Series) + + def is_df(part): + assert isinstance(part, cudf.DataFrame), part + return part + + predt.map_partitions( + is_df, + meta=dd.utils.make_meta({'prediction': 'f4'})) + + cupy.testing.assert_allclose( + predt.values.compute(), single_node) + @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_dask_array(self): diff --git a/tests/python/testing.py b/tests/python/testing.py index 708e5af4ca55..07b0f5b04d27 100644 --- a/tests/python/testing.py +++ b/tests/python/testing.py @@ -1,6 +1,6 @@ # coding: utf-8 from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED -from xgboost.compat import CUDF_INSTALLED, DASK_INSTALLED +from xgboost.compat import DASK_INSTALLED def no_sklearn(): @@ -46,6 +46,12 @@ def no_dask_cuda(): def no_cudf(): + try: + import cudf # noqa + CUDF_INSTALLED = True + except ImportError: + CUDF_INSTALLED = False + return {'condition': not CUDF_INSTALLED, 'reason': 'CUDF is not installed'}