From 7268b5f54a6942d1127c49d2723f44c97d366310 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 5 Apr 2023 11:13:40 -0700 Subject: [PATCH 1/9] Pin dask and distributed for release --- .github/workflows/pr.yaml | 2 +- .github/workflows/test.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 4 ++-- conda/recipes/custreamz/meta.yaml | 4 ++-- conda/recipes/dask-cudf/meta.yaml | 8 ++++---- conda/recipes/dask-cudf/run_test.sh | 8 ++++---- dependencies.yaml | 4 ++-- python/dask_cudf/pyproject.toml | 4 ++-- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 3d0cda92f5e..741fdd81b80 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -134,5 +134,5 @@ jobs: build_type: pull-request package-name: dask_cudf # Install the cudf we just built, and also test against latest dask/distributed/dask-cuda. - test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" + test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 660f46199b4..fac28a21e7c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -98,5 +98,5 @@ jobs: sha: ${{ inputs.sha }} package-name: dask_cudf # Test against latest dask/distributed/dask-cuda. - test-before: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" + test-before: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 890cb199419..2bc6b2cbad9 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -22,8 +22,8 @@ dependencies: - cxx-compiler - cython>=0.29,<0.30 - dask-cuda==23.4.* -- dask>=2023.1.1 -- distributed>=2023.1.1 +- dask==2023.3.2 +- distributed==2023.3.2.1 - dlpack>=0.5,<0.6.0a0 - doxygen=1.8.20 - fastavro>=0.22.9 diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 5fafa7464db..5606938993f 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -41,8 +41,8 @@ requirements: - python - streamz - cudf ={{ version }} - - dask >=2023.1.1 - - distributed >=2023.1.1 + - dask ==2023.3.2 + - distributed ==2023.3.2.1 - python-confluent-kafka >=1.7.0,<1.8.0a0 - cudf_kafka ={{ version }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 79f1f09858a..8a87a79b3f2 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -37,14 +37,14 @@ requirements: host: - python - cudf ={{ version }} - - dask >=2023.1.1 - - distributed >=2023.1.1 + - dask ==2023.3.2 + - distributed ==2023.3.2.1 - cudatoolkit ={{ cuda_version }} run: - python - cudf ={{ version }} - - dask >=2023.1.1 - - distributed >=2023.1.1 + - dask ==2023.3.2 + - distributed ==2023.3.2.1 - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} test: diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh index 0c2f628dcf2..a9ff7431eb9 100644 --- a/conda/recipes/dask-cudf/run_test.sh +++ b/conda/recipes/dask-cudf/run_test.sh @@ -18,18 +18,18 @@ if [ "${ARCH}" = "aarch64" ]; then fi # Dask & Distributed option to install main(nightly) or `conda-forge` packages. -export INSTALL_DASK_MAIN=1 +export INSTALL_DASK_MAIN=0 # Dask version to install when `INSTALL_DASK_MAIN=0` -export DASK_STABLE_VERSION="2023.1.1" +export DASK_STABLE_VERSION="2023.3.2" # Install the conda-forge or nightly version of dask and distributed if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then rapids-logger "rapids-mamba-retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'" rapids-mamba-retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed" else - rapids-logger "rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall" - rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall + rapids-logger "rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed==2023.3.2.1 conda-forge::dask-core==2023.3.2.1 --force-reinstall" + rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=="2023.3.2.1" conda-forge::dask-core=="2023.3.2.1" --force-reinstall fi logger "python -c 'import dask_cudf'" diff --git a/dependencies.yaml b/dependencies.yaml index 1bd664fc57d..04e6e40f55d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -382,8 +382,8 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - dask>=2023.1.1 - - distributed>=2023.1.1 + - dask==2023.3.2 + - distributed==2023.3.2.1 - output_types: pyproject packages: - &cudf cudf==23.4.* diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index c91a9bb3b85..358a7cbcd5e 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -20,8 +20,8 @@ requires-python = ">=3.8" dependencies = [ "cudf==23.4.*", "cupy-cuda11x>=9.5.0,<12.0.0a0", - "dask>=2023.1.1", - "distributed>=2023.1.1", + "dask==2023.3.2", + "distributed==2023.3.2.1", "fsspec>=0.6.0", "numpy>=1.21", "pandas>=1.3,<1.6.0dev0", From 6563440922f1369268ca85e5cf336ee3c6454a22 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 5 Apr 2023 12:04:31 -0700 Subject: [PATCH 2/9] Fix a dask-cudf error --- python/dask_cudf/dask_cudf/io/parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index f19c373150d..551a4561918 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -158,7 +158,7 @@ def _read_paths( # Build the column from `codes` directly # (since the category is often a larger dtype) codes = as_column( - partitions[i].keys.index(index2), + partitions[i].keys.get_loc(index2), length=len(df), ) df[name] = build_categorical_column( From 54e788983833c2ff2248b25729eccc29e6e4ba96 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 5 Apr 2023 14:36:55 -0500 Subject: [PATCH 3/9] Apply suggestions from code review --- conda/environments/all_cuda-118_arch-x86_64.yaml | 1 + conda/recipes/custreamz/meta.yaml | 1 + conda/recipes/dask-cudf/meta.yaml | 2 ++ dependencies.yaml | 1 + python/dask_cudf/dask_cudf/io/parquet.py | 2 +- python/dask_cudf/pyproject.toml | 1 + 6 files changed, 7 insertions(+), 1 deletion(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 2bc6b2cbad9..0a4596e9c0a 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -23,6 +23,7 @@ dependencies: - cython>=0.29,<0.30 - dask-cuda==23.4.* - dask==2023.3.2 +- dask-core==2023.3.2 - distributed==2023.3.2.1 - dlpack>=0.5,<0.6.0a0 - doxygen=1.8.20 diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 5606938993f..6c10369d1cd 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -42,6 +42,7 @@ requirements: - streamz - cudf ={{ version }} - dask ==2023.3.2 + - dask-core ==2023.3.2 - distributed ==2023.3.2.1 - python-confluent-kafka >=1.7.0,<1.8.0a0 - cudf_kafka ={{ version }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 8a87a79b3f2..d8f93f720ee 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -38,12 +38,14 @@ requirements: - python - cudf ={{ version }} - dask ==2023.3.2 + - dask-core ==2023.3.2 - distributed ==2023.3.2.1 - cudatoolkit ={{ cuda_version }} run: - python - cudf ={{ version }} - dask ==2023.3.2 + - dask-core ==2023.3.2 - distributed ==2023.3.2.1 - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} diff --git a/dependencies.yaml b/dependencies.yaml index 04e6e40f55d..9d9fcc0c3a4 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -383,6 +383,7 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - dask==2023.3.2 + - dask-core==2023.3.2 - distributed==2023.3.2.1 - output_types: pyproject packages: diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 551a4561918..f19c373150d 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -158,7 +158,7 @@ def _read_paths( # Build the column from `codes` directly # (since the category is often a larger dtype) codes = as_column( - partitions[i].keys.get_loc(index2), + partitions[i].keys.index(index2), length=len(df), ) df[name] = build_categorical_column( diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 358a7cbcd5e..f6dd2cd4242 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "cudf==23.4.*", "cupy-cuda11x>=9.5.0,<12.0.0a0", "dask==2023.3.2", + "dask-core==2023.3.2", "distributed==2023.3.2.1", "fsspec>=0.6.0", "numpy>=1.21", From 46a8016d7c21c424538be997ad1ec46d0d8132f4 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 5 Apr 2023 14:44:30 -0500 Subject: [PATCH 4/9] Update python/dask_cudf/pyproject.toml --- python/dask_cudf/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index f6dd2cd4242..11e029c0a56 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -20,8 +20,8 @@ requires-python = ">=3.8" dependencies = [ "cudf==23.4.*", "cupy-cuda11x>=9.5.0,<12.0.0a0", - "dask==2023.3.2", "dask-core==2023.3.2", + "dask==2023.3.2", "distributed==2023.3.2.1", "fsspec>=0.6.0", "numpy>=1.21", From 1d95f75c932e9964bf8b70c75be183b9ae181672 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 5 Apr 2023 14:54:45 -0500 Subject: [PATCH 5/9] Update all_cuda-118_arch-x86_64.yaml --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 0a4596e9c0a..e7fa1fcc530 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -21,9 +21,9 @@ dependencies: - cupy>=9.5.0,<12.0.0a0 - cxx-compiler - cython>=0.29,<0.30 +- dask-core==2023.3.2 - dask-cuda==23.4.* - dask==2023.3.2 -- dask-core==2023.3.2 - distributed==2023.3.2.1 - dlpack>=0.5,<0.6.0a0 - doxygen=1.8.20 From a3ed98abcb1e08ec44821932de7281d32b422a9c Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 5 Apr 2023 15:35:01 -0500 Subject: [PATCH 6/9] Update python/dask_cudf/pyproject.toml --- python/dask_cudf/pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 11e029c0a56..358a7cbcd5e 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -20,7 +20,6 @@ requires-python = ">=3.8" dependencies = [ "cudf==23.4.*", "cupy-cuda11x>=9.5.0,<12.0.0a0", - "dask-core==2023.3.2", "dask==2023.3.2", "distributed==2023.3.2.1", "fsspec>=0.6.0", From 5179b8ee3865bd62828394424b52d0a7ae3c7ff2 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 5 Apr 2023 15:36:37 -0500 Subject: [PATCH 7/9] Update dependencies.yaml --- dependencies.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index 9d9fcc0c3a4..d11464ccdf4 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -383,8 +383,10 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - dask==2023.3.2 - - dask-core==2023.3.2 - distributed==2023.3.2.1 + - output_types: conda + packages: + - dask-core==2023.3.2 - output_types: pyproject packages: - &cudf cudf==23.4.* From 241b560a699b8d97d3e5c22f3cab3841932c729f Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 5 Apr 2023 16:11:20 -0500 Subject: [PATCH 8/9] Update conda/recipes/dask-cudf/run_test.sh --- conda/recipes/dask-cudf/run_test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh index a9ff7431eb9..d315e1d8a6d 100644 --- a/conda/recipes/dask-cudf/run_test.sh +++ b/conda/recipes/dask-cudf/run_test.sh @@ -28,8 +28,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then rapids-logger "rapids-mamba-retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'" rapids-mamba-retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed" else - rapids-logger "rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed==2023.3.2.1 conda-forge::dask-core==2023.3.2.1 --force-reinstall" - rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=="2023.3.2.1" conda-forge::dask-core=="2023.3.2.1" --force-reinstall + rapids-logger "rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed==2023.3.2.1 conda-forge::dask-core==2023.3.2 --force-reinstall" + rapids-mamba-retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=="2023.3.2.1" conda-forge::dask-core=="2023.3.2" --force-reinstall fi logger "python -c 'import dask_cudf'" From d1a011433eb52d985055259ac7dc8b209e89e760 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 5 Apr 2023 16:37:42 -0500 Subject: [PATCH 9/9] Update dependencies.yaml --- dependencies.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index d11464ccdf4..d24cc927611 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -386,7 +386,7 @@ dependencies: - distributed==2023.3.2.1 - output_types: conda packages: - - dask-core==2023.3.2 + - dask-core==2023.3.2 # dask-core in conda is the actual package & dask is the meta package - output_types: pyproject packages: - &cudf cudf==23.4.*