From cbdb56c56d0b2abd7fc3c32694d76122f708fb7b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:15:45 +0200 Subject: [PATCH] Enable dask-expr for tpch queries (#1046) --- AB_environments/AB_baseline.conda.yaml | 1 + AB_environments/AB_sample.conda.yaml | 2 ++ ci/environment-git-tip.yml | 1 + ci/environment.yml | 1 + tests/benchmarks/test_tpch.py | 4 ++-- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/AB_environments/AB_baseline.conda.yaml b/AB_environments/AB_baseline.conda.yaml index 3e2790038a..2c978d9ee3 100644 --- a/AB_environments/AB_baseline.conda.yaml +++ b/AB_environments/AB_baseline.conda.yaml @@ -13,6 +13,7 @@ dependencies: - pandas ==2.1.1 - dask ==2023.9.2 - distributed ==2023.9.2 + - dask-expr - dask-labextension ==7.0.0 - dask-ml ==2023.3.24 - fsspec ==2023.9.1 diff --git a/AB_environments/AB_sample.conda.yaml b/AB_environments/AB_sample.conda.yaml index 5099848a7b..8b6f96eaed 100644 --- a/AB_environments/AB_sample.conda.yaml +++ b/AB_environments/AB_sample.conda.yaml @@ -18,6 +18,7 @@ dependencies: - pandas ==2.1.1 - dask ==2023.9.2 - distributed ==2023.9.2 + - dask-expr - dask-labextension ==7.0.0 - dask-ml ==2023.3.24 - fsspec ==2023.9.1 @@ -56,3 +57,4 @@ dependencies: # Read README.md for troubleshooting. - git+https://github.com/dask/dask@da256320ef0167992f7183c3a275d092f5727f62 - git+https://github.com/dask/distributed@285893037fe9eac83f363611b4799168aabb3992 + - git+https://github.com/dask-contrib/dask-expr diff --git a/ci/environment-git-tip.yml b/ci/environment-git-tip.yml index fff12a054d..5c14da345c 100644 --- a/ci/environment-git-tip.yml +++ b/ci/environment-git-tip.yml @@ -6,3 +6,4 @@ dependencies: - git+https://github.com/dask/dask - git+https://github.com/dask/distributed - git+https://github.com/dask/zict + - git+https://github.com/dask-contrib/dask-expr diff --git a/ci/environment.yml b/ci/environment.yml index 43beec1e1e..5d09daebb1 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -8,6 +8,7 @@ dependencies: - pandas ==2.1.1 - dask ==2023.9.2 - distributed ==2023.9.2 + - dask-expr - dask-labextension ==7.0.0 - dask-ml ==2023.3.24 - fsspec ==2023.9.1 diff --git a/tests/benchmarks/test_tpch.py b/tests/benchmarks/test_tpch.py index 29402e3083..19e7133cbe 100644 --- a/tests/benchmarks/test_tpch.py +++ b/tests/benchmarks/test_tpch.py @@ -1,11 +1,11 @@ import os from datetime import datetime -import dask.dataframe as dd +import dask_expr as dd DATASETS = { "scale 100": "s3://coiled-runtime-ci/tpch_scale_100/", - "scale 1000": "s3://coiled-runtime-ci/tpch_scale_1000/", + "scale 1000": "s3://coiled-runtime-ci/tpch-scale-1000/", } enabled_dataset = os.getenv("TPCH_SCALE")