Skip to content

Commit

Permalink
Test with dask-expr (#6259)
Browse files Browse the repository at this point in the history
  • Loading branch information
hoxbro authored Jun 6, 2024
1 parent 17cc894 commit 3b24039
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 17 deletions.
3 changes: 0 additions & 3 deletions holoviews/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,9 @@ def pytest_collection_modifyitems(config, items):
with contextlib.suppress(Exception):
# From Dask 2023.7.1 they now automatically convert strings
# https://docs.dask.org/en/stable/changelog.html#v2023-7-1
# From Dask 2024.3.0 they now use `dask_expr` by default
# https://github.com/dask/dask/issues/10995
import dask

dask.config.set({"dataframe.convert-string": False})
dask.config.set({"dataframe.query-planning": False})


@pytest.fixture
Expand Down
8 changes: 4 additions & 4 deletions holoviews/tests/core/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,22 +607,22 @@ def test_dataset_empty_aggregate_with_spreadfn(self):
aggregated = Dataset([], kdims=self.kdims[:1], vdims=[d for vd in self.vdims for d in [vd, vd+'_std']])
self.compare_dataset(dataset.aggregate(['Gender'], np.mean, np.std), aggregated)

def test_dataset_groupby(self):
def test_dataset_groupby(self, sort=False):
group1 = {'Age':[10,16], 'Weight':[15,18], 'Height':[0.8,0.6]}
group2 = {'Age':[12], 'Weight':[10], 'Height':[0.8]}
grouped = HoloMap([('M', Dataset(group1, kdims=['Age'], vdims=self.vdims)),
('F', Dataset(group2, kdims=['Age'], vdims=self.vdims))],
kdims=['Gender'], sort=False)
kdims=['Gender'], sort=sort)
self.assertEqual(self.table.groupby(['Gender']), grouped)

def test_dataset_groupby_alias(self):
def test_dataset_groupby_alias(self, sort=False):
group1 = {'age':[10,16], 'weight':[15,18], 'height':[0.8,0.6]}
group2 = {'age':[12], 'weight':[10], 'height':[0.8]}
grouped = HoloMap([('M', Dataset(group1, kdims=[('age', 'Age')],
vdims=self.alias_vdims)),
('F', Dataset(group2, kdims=[('age', 'Age')],
vdims=self.alias_vdims))],
kdims=[('gender', 'Gender')], sort=False)
kdims=[('gender', 'Gender')], sort=sort)
self.assertEqual(self.alias_table.groupby('Gender'), grouped)

def test_dataset_groupby_second_dim(self):
Expand Down
48 changes: 45 additions & 3 deletions holoviews/tests/core/data/test_daskinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import pandas as pd
import pytest
from packaging.version import Version

try:
Expand All @@ -14,18 +15,23 @@
from holoviews.core.util import pandas_version
from holoviews.util.transform import dim

from ...utils import dask_switcher
from .test_pandasinterface import BasePandasInterfaceTests

try:
import dask_expr
except ImportError:
dask_expr = None


class DaskDatasetTest(BasePandasInterfaceTests):
class _DaskDatasetTest(BasePandasInterfaceTests):
"""
Test of the pandas DaskDataset interface.
"""

datatype = 'dask'
data_type = dd.DataFrame

__test__ = True
__test__ = False

# Disabled tests for NotImplemented methods
def test_dataset_add_dimensions_values_hm(self):
Expand Down Expand Up @@ -128,3 +134,39 @@ def test_select_expression_lazy(self):
# Make sure that selecting by expression didn't cause evaluation
self.assertIsInstance(new_ds.data, dd.DataFrame)
self.assertEqual(new_ds.data.compute(), df[df.b == 10])


class DaskClassicDatasetTest(_DaskDatasetTest):

data_type = dd.core.DataFrame

__test__ = True

@dask_switcher(query=False)
def setUp(self):
return super().setUp()


class DaskExprDatasetTest(_DaskDatasetTest):

__test__ = bool(dask_expr)

@property
def data_type(self):
return dask_expr.DataFrame

@dask_switcher(query=True)
def setUp(self):
return super().setUp()

def test_dataset_groupby(self):
# Dask-expr unique sort the order when running unique on column
super().test_dataset_groupby(sort=True)

def test_dataset_groupby_alias(self):
# Dask-expr unique sort the order when running unique on column
super().test_dataset_groupby_alias(sort=True)

@pytest.mark.xfail(reason="Not supported yet, see https://github.com/dask/dask-expr/issues/1076")
def test_multi_dimension_groupby(self):
super().test_multi_dimension_groupby()
2 changes: 2 additions & 0 deletions holoviews/tests/core/data/test_spatialpandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from holoviews.element import Path, Points, Polygons
from holoviews.element.comparison import ComparisonTestCase

from ...utils import dask_switcher
from .test_multiinterface import GeomTests


Expand Down Expand Up @@ -259,6 +260,7 @@ class DaskSpatialPandasTest(GeomTests, RoundTripTests):

__test__ = True

@dask_switcher(query=False, extras=["spatialpandas.dask"])
def setUp(self):
if spatialpandas is None:
raise SkipTest('DaskSpatialPandasInterface requires spatialpandas, skipping tests')
Expand Down
9 changes: 6 additions & 3 deletions holoviews/tests/element/test_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
from holoviews.element.comparison import ComparisonTestCase
from holoviews.element.selection import spatial_select_columnar

from ..utils import dask_switcher

try:
import datashader as ds
except ImportError:
Expand Down Expand Up @@ -658,9 +660,10 @@ def pandas_df(self):
}, dtype=float)


@pytest.fixture(scope="function")
def dask_df(self, pandas_df):
return dd.from_pandas(pandas_df, npartitions=2)
@pytest.fixture(scope="function", params=[pytest.param(True, id='dask-classic'), pytest.param(False, id='dask-expr')])
def dask_df(self, pandas_df, request):
with dask_switcher(query=request.param):
return dd.from_pandas(pandas_df, npartitions=2)

@pytest.fixture(scope="function")
def _method(self):
Expand Down
29 changes: 29 additions & 0 deletions holoviews/tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import logging
import os
import sys
from contextlib import contextmanager
from importlib import reload
from importlib.util import find_spec

import param
import pytest

from holoviews.element.comparison import ComparisonTestCase

Expand Down Expand Up @@ -111,3 +115,28 @@ def tearDown(self):
for level, msgs in messages.items():
for msg in msgs:
log.log(LEVELS[level], msg)


DASK_UNAVAILABLE = find_spec("dask") is None
EXPR_UNAVAILABLE = find_spec("dask_expr") is None


@contextmanager
def dask_switcher(*, query=False, extras=()):
"""
Context manager to switch on/off dask-expr query planning.
Using a context manager as it is an easy way to
change the function to a decorator.
"""
if DASK_UNAVAILABLE:
pytest.skip("dask is not available")
if query and EXPR_UNAVAILABLE:
pytest.skip("dask-expr is not available")

import dask

dask.config.set(**{"dataframe.query-planning": query})
for module in ("dask.dataframe", *extras):
if module in sys.modules:
reload(sys.modules[module])
yield
5 changes: 3 additions & 2 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ download-data = 'python scripts/download_data.py'
install = 'python -m pip install --no-deps --disable-pip-version-check -e .'

[activation.env]
DASK_DATAFRAME__QUERY_PLANNING = "False"
PYTHONIOENCODING = "utf-8"

[environments]
Expand Down Expand Up @@ -87,6 +86,7 @@ test-unit = 'pytest holoviews/tests -n logical --dist loadgroup'
cftime = "*"
contourpy = "*"
dask-core = "*"
dask-expr = "*"
datashader = ">=0.11.1"
ffmpeg = "*"
ibis-sqlite = "*"
Expand All @@ -104,7 +104,7 @@ xyzservices = "*"
tsdownsample = "*" # currently not available on Windows

[feature.test-example.tasks]
test-example = 'pytest -n logical --dist loadscope --nbval-lax examples'
test-example = { cmd = 'pytest -n logical --dist loadscope --nbval-lax examples', env = { DASK_DATAFRAME__QUERY_PLANNING = "False" } }

[feature.test-example.dependencies]
nbval = "*"
Expand Down Expand Up @@ -185,6 +185,7 @@ python-kaleido = "*"
selenium = "*"

[feature.doc.activation.env]
DASK_DATAFRAME__QUERY_PLANNING = "False"
MOZ_HEADLESS = "1"
MPLBACKEND = "Agg"
PANEL_EMBED = "true"
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,12 @@ filterwarnings = [
"ignore:When grouping with a length-1 list::dask.dataframe.groupby", # https://github.com/dask/dask/issues/10572
"ignore:\\s*Pyarrow will become a required dependency of pandas:DeprecationWarning", # Will go away by itself in Pandas 3.0
"ignore:Passing a (SingleBlockManager|BlockManager) to (Series|GeoSeries|DataFrame|GeoDataFrame) is deprecated:DeprecationWarning", # https://github.com/holoviz/spatialpandas/issues/137
# 2024-02
"ignore:The current Dask DataFrame implementation is deprecated:DeprecationWarning", # https://github.com/dask/dask/issues/10917
# 2024-04
"ignore:No data was collected:coverage.exceptions.CoverageWarning", # https://github.com/pytest-dev/pytest-cov/issues/627
# 2024-05
"ignore:backend2gui is deprecated since IPython 8.24:DeprecationWarning", # https://github.com/holoviz/holoviews/pull/6227#issuecomment-2104401396
# 2024-06
"ignore:\\s*Dask dataframe query planning is disabled because dask-expr is not installed:FutureWarning",
]

[tool.coverage]
Expand Down

0 comments on commit 3b24039

Please sign in to comment.