diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index e8bc735e6ff1..1722670fdf5c 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -7,6 +7,7 @@ import os import re import sys +import uuid import warnings import weakref from abc import ABC, abstractmethod @@ -3143,3 +3144,9 @@ def get_split_value_histogram( UserWarning, ) return nph_stacked + + def __dask_tokenize__(self) -> uuid.UUID: + # TODO: Implement proper tokenization to avoid unnecessary re-computation in + # Dask. However, default tokenzation causes problems after + # https://github.com/dask/dask/pull/10883 + return uuid.uuid4() diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index f68ba9d6b14b..6a37d21ffe71 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -25,7 +25,7 @@ RUN \ mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \ python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ "nccl>=${NCCL_SHORT_VER}" \ - dask=2024.1.1 \ + dask \ dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ "pyspark>=3.4.0" cloudpickle cuda-python && \ diff --git a/tests/ci_build/Dockerfile.gpu_dev_ver b/tests/ci_build/Dockerfile.gpu_dev_ver index a592d4891093..dfcb379642f2 100644 --- a/tests/ci_build/Dockerfile.gpu_dev_ver +++ b/tests/ci_build/Dockerfile.gpu_dev_ver @@ -28,7 +28,7 @@ RUN \ mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \ python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cudatoolkit=$CUDA_VERSION_ARG \ "nccl>=${NCCL_SHORT_VER}" \ - dask=2024.1.1 \ + dask \ "dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ "pyspark>=3.4.0" cloudpickle cuda-python && \ diff --git a/tests/ci_build/conda_env/linux_cpu_test.yml b/tests/ci_build/conda_env/linux_cpu_test.yml index d87d8fdef6b4..fd630c85a07f 100644 --- a/tests/ci_build/conda_env/linux_cpu_test.yml +++ b/tests/ci_build/conda_env/linux_cpu_test.yml @@ -17,8 +17,8 @@ dependencies: - scikit-learn - pandas - matplotlib -- dask>=2022.6 -- distributed>=2022.6 +- dask +- distributed - python-graphviz - hypothesis>=6.46 - astroid diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py index 905947d874ee..c872ec9d7b1d 100644 --- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py +++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py @@ -248,10 +248,10 @@ def test_categorical(self, local_cuda_client: Client) -> None: import dask_cudf X, y = make_categorical(local_cuda_client, 10000, 30, 13) - X = dask_cudf.from_dask_dataframe(X) + X = X.to_backend("cudf") X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, True) - X_onehot = dask_cudf.from_dask_dataframe(X_onehot) + X_onehot = X_onehot.to_backend("cudf") run_categorical(local_cuda_client, "hist", "cuda", X, X_onehot, y) @given( @@ -383,9 +383,9 @@ def test_dask_classifier(self, model: str, local_cuda_client: Client) -> None: X_, y_, w_ = generate_array(with_weights=True) y_ = (y_ * 10).astype(np.int32) - X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_)) - y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_)) - w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_)) + X = dd.from_dask_array(X_).to_backend("cudf") + y = dd.from_dask_array(y_).to_backend("cudf") + w = dd.from_dask_array(w_).to_backend("cudf") run_dask_classifier(X, y, w, model, "hist", "cuda", local_cuda_client, 10) def test_empty_dmatrix(self, local_cuda_client: Client) -> None: