Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into fix_release_degra…
Browse files Browse the repository at this point in the history
…dations
  • Loading branch information
hcho3 committed May 28, 2020
2 parents 0278f3a + 35e2205 commit f2583a3
Show file tree
Hide file tree
Showing 12 changed files with 65 additions and 71 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ if (USE_CUDA)
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")

enable_language(CUDA)
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 10.0)
message(FATAL_ERROR "CUDA version must be at least 10.0!")
endif()
set(GEN_CODE "")
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
Expand Down
1 change: 0 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ def TestPythonCPU() {
def docker_binary = "docker"
sh """
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/test_python.sh cpu
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/test_python.sh cpu-py35
"""
deleteDir()
}
Expand Down
5 changes: 2 additions & 3 deletions Jenkinsfile-win64
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pipeline {
steps {
script {
parallel ([
'build-win64-cuda9.0': { BuildWin64() }
'build-win64-cuda10.0': { BuildWin64() }
])
}
milestone ordinal: 2
Expand All @@ -40,7 +40,6 @@ pipeline {
script {
parallel ([
'test-win64-cpu': { TestWin64CPU() },
'test-win64-gpu-cuda9.0': { TestWin64GPU(cuda_target: 'cuda9') },
'test-win64-gpu-cuda10.0': { TestWin64GPU(cuda_target: 'cuda10_0') },
'test-win64-gpu-cuda10.1': { TestWin64GPU(cuda_target: 'cuda10_1') }
])
Expand All @@ -67,7 +66,7 @@ def checkoutSrcs() {
}

def BuildWin64() {
node('win64 && build') {
node('win64 && build && cuda10') {
unstash name: 'srcs'
echo "Building XGBoost for Windows AMD64 target..."
bat "nvcc --version"
Expand Down
3 changes: 1 addition & 2 deletions python-package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,10 @@ def run(self):
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8'],
python_requires='>=3.5',
python_requires='>=3.6',
url='https://github.com/dmlc/xgboost')

clean_up()
6 changes: 0 additions & 6 deletions python-package/xgboost/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,6 @@
except ImportError:
pass

if sys.version_info[:2] == (3, 5):
warnings.warn(
'Python 3.5 support is deprecated; XGBoost will require Python 3.6+ in the near future. ' +
'Consider upgrading to Python 3.6+.',
FutureWarning)

VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION')
with open(VERSION_FILE) as f:
__version__ = f.read().strip()
Expand Down
7 changes: 0 additions & 7 deletions python-package/xgboost/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,8 @@ def lazy_isinstance(instance, module, name):

# cudf
try:
from cudf import DataFrame as CUDF_DataFrame
from cudf import Series as CUDF_Series
from cudf import concat as CUDF_concat
CUDF_INSTALLED = True
except ImportError:
CUDF_DataFrame = object
CUDF_Series = object
CUDF_MultiIndex = object
CUDF_INSTALLED = False
CUDF_concat = None

# sklearn
Expand Down
9 changes: 4 additions & 5 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@

from .compat import (
STRING_TYPES, DataFrame, py_str,
PANDAS_INSTALLED, CUDF_INSTALLED,
CUDF_DataFrame,
PANDAS_INSTALLED,
os_fspath, os_PathLike, lazy_isinstance)
from .libpath import find_lib_path

Expand Down Expand Up @@ -282,8 +281,8 @@ def _convert_unknown_data(data, meta=None, meta_type=None):

# Either object has cuda array interface or contains columns with interfaces
def _has_cuda_array_interface(data):
return hasattr(data, '__cuda_array_interface__') or (
CUDF_INSTALLED and isinstance(data, CUDF_DataFrame))
return hasattr(data, '__cuda_array_interface__') or \
lazy_isinstance(data, 'cudf.core.dataframe', 'DataFrame')


def _cudf_array_interfaces(df):
Expand Down Expand Up @@ -508,7 +507,7 @@ def set_uint_info(self, field, data):
def set_interface_info(self, field, data):
"""Set info type property into DMatrix."""
# If we are passed a dataframe, extract the series
if CUDF_INSTALLED and isinstance(data, CUDF_DataFrame):
if lazy_isinstance(data, 'cudf.core.dataframe', 'DataFrame'):
if len(data.columns) != 1:
raise ValueError(
'Expecting meta-info to contain a single column')
Expand Down
62 changes: 31 additions & 31 deletions python-package/xgboost/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from .compat import da, dd, delayed, get_client
from .compat import sparse, scipy_sparse
from .compat import PANDAS_INSTALLED, DataFrame, Series, pandas_concat
from .compat import CUDF_INSTALLED, CUDF_DataFrame, CUDF_Series, CUDF_concat
from .compat import CUDF_concat
from .compat import lazy_isinstance

from .core import DMatrix, Booster, _expect
Expand Down Expand Up @@ -97,7 +97,8 @@ def concat(value): # pylint: disable=too-many-return-statements
return sparse.concatenate(value, axis=0)
if PANDAS_INSTALLED and isinstance(value[0], (DataFrame, Series)):
return pandas_concat(value, axis=0)
if CUDF_INSTALLED and isinstance(value[0], (CUDF_DataFrame, CUDF_Series)):
if lazy_isinstance(value[0], 'cudf.core.dataframe', 'DataFrame') or \
lazy_isinstance(value[0], 'cudf.core.series', 'Series'):
return CUDF_concat(value, axis=0)
if lazy_isinstance(value[0], 'cupy.core.core', 'ndarray'):
import cupy # pylint: disable=import-error
Expand Down Expand Up @@ -461,6 +462,25 @@ def dispatched_train(worker_addr):
return list(filter(lambda ret: ret is not None, results))[0]


def _direct_predict_impl(client, data, predict_fn):
if isinstance(data, da.Array):
predictions = client.submit(
da.map_blocks,
predict_fn, data, False, drop_axis=1,
dtype=numpy.float32
).result()
return predictions
if isinstance(data, dd.DataFrame):
predictions = client.submit(
dd.map_partitions,
predict_fn, data, True,
meta=dd.utils.make_meta({'prediction': 'f4'})
).result()
return predictions.iloc[:, 0]
raise TypeError('data of type: ' + str(type(data)) +
' is not supported by direct prediction')


def predict(client, model, data, *args, missing=numpy.nan):
'''Run prediction with a trained booster.
Expand Down Expand Up @@ -502,26 +522,19 @@ def predict(client, model, data, *args, missing=numpy.nan):

def mapped_predict(partition, is_df):
worker = distributed_get_worker()
booster.set_param({'nthread': worker.nthreads})
m = DMatrix(partition, missing=missing, nthread=worker.nthreads)
predt = booster.predict(m, *args, validate_features=False)
if is_df:
predt = DataFrame(predt, columns=['prediction'])
if lazy_isinstance(partition, 'cudf', 'core.dataframe.DataFrame'):
import cudf # pylint: disable=import-error
predt = cudf.DataFrame(predt, columns=['prediction'])
else:
predt = DataFrame(predt, columns=['prediction'])
return predt

if isinstance(data, da.Array):
predictions = client.submit(
da.map_blocks,
mapped_predict, data, False, drop_axis=1,
dtype=numpy.float32
).result()
return predictions
if isinstance(data, dd.DataFrame):
predictions = client.submit(
dd.map_partitions,
mapped_predict, data, True,
meta=dd.utils.make_meta({'prediction': 'f4'})
).result()
return predictions.iloc[:, 0]
if isinstance(data, (da.Array, dd.DataFrame)):
return _direct_predict_impl(client, data, mapped_predict)

# Prediction on dask DMatrix.
worker_map = data.worker_map
Expand Down Expand Up @@ -644,20 +657,7 @@ def mapped_predict(data, is_df):
dtype=numpy.float32)
return prediction

if isinstance(data, da.Array):
predictions = client.submit(
da.map_blocks,
mapped_predict, data, False, drop_axis=1,
dtype=numpy.float32
).result()
return predictions
if isinstance(data, dd.DataFrame):
predictions = client.submit(
dd.map_partitions,
mapped_predict, data, True,
meta=dd.utils.make_meta({'prediction': 'f4'})
).result()
return predictions.iloc[:, 0]
return _direct_predict_impl(client, data, mapped_predict)


def _evaluation_matrices(client, validation_set, sample_weights, missing):
Expand Down
6 changes: 0 additions & 6 deletions tests/ci_build/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@ ENV PATH=/opt/python/bin:$PATH

ENV GOSU_VERSION 1.10

# Create new Conda environment with Python 3.5
RUN conda create -n py35 python=3.5 && \
source activate py35 && \
pip install numpy pytest scipy scikit-learn pandas matplotlib wheel kubernetes urllib3 graphviz && \
source deactivate

# Install Python packages in default env
RUN \
pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh \
Expand Down
8 changes: 1 addition & 7 deletions tests/ci_build/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,8 @@ case "$suite" in
./runtests.sh
;;

cpu-py35)
source activate py35
install_xgboost
pytest -v -s --fulltrace tests/python
;;

*)
echo "Usage: $0 {gpu|mgpu|cudf|cpu|cpu-py35}"
echo "Usage: $0 {gpu|mgpu|cudf|cpu}"
exit 1
;;
esac
18 changes: 16 additions & 2 deletions tests/python-gpu/test_gpu_with_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ def test_dask_dataframe(self):
out = dxgb.train(client, {'tree_method': 'gpu_hist'},
dtrain=dtrain,
evals=[(dtrain, 'X')],
num_boost_round=2)
num_boost_round=4)

assert isinstance(out['booster'], dxgb.Booster)
assert len(out['history']['X']['rmse']) == 2
assert len(out['history']['X']['rmse']) == 4

predictions = dxgb.predict(client, out, dtrain).compute()
assert isinstance(predictions, np.ndarray)
Expand All @@ -62,6 +62,20 @@ def test_dask_dataframe(self):
cupy.testing.assert_allclose(single_node, predictions)
cupy.testing.assert_allclose(single_node, series_predictions)

predt = dxgb.predict(client, out, X)
assert isinstance(predt, dd.Series)

def is_df(part):
assert isinstance(part, cudf.DataFrame), part
return part

predt.map_partitions(
is_df,
meta=dd.utils.make_meta({'prediction': 'f4'}))

cupy.testing.assert_allclose(
predt.values.compute(), single_node)

@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_dask_array(self):
Expand Down
8 changes: 7 additions & 1 deletion tests/python/testing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
from xgboost.compat import CUDF_INSTALLED, DASK_INSTALLED
from xgboost.compat import DASK_INSTALLED


def no_sklearn():
Expand Down Expand Up @@ -46,6 +46,12 @@ def no_dask_cuda():


def no_cudf():
try:
import cudf # noqa
CUDF_INSTALLED = True
except ImportError:
CUDF_INSTALLED = False

return {'condition': not CUDF_INSTALLED,
'reason': 'CUDF is not installed'}

Expand Down

0 comments on commit f2583a3

Please sign in to comment.