From 94a0d096c162dec0640f40953c899fa8639155f8 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Thu, 5 Oct 2023 09:18:26 -0700 Subject: [PATCH 01/14] Adds op threshold --- hvplot/converter.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index 20cc2874a..74736fdb9 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -22,7 +22,7 @@ ) from holoviews.plotting.bokeh import OverlayPlot, colormap_generator from holoviews.plotting.util import process_cmap -from holoviews.operation import histogram +from holoviews.operation import histogram, apply_when from holoviews.streams import Buffer, Pipe from holoviews.util.transform import dim from packaging.version import Version @@ -212,6 +212,10 @@ class HoloViewsConverter: Whether to apply rasterization using the Datashader library, returning an aggregated Image (to be colormapped by the plotting backend) instead of individual points + op_threshold (default=None): + The threshold before applying the operation (datashade / rasterize); + if the number of points is below this value, the plot will + not be rasterized or datashaded. x_sampling/y_sampling (default=None): Specifies the smallest allowed sampling interval along the x/y axis. @@ -286,7 +290,7 @@ class HoloViewsConverter: _op_options = [ 'datashade', 'rasterize', 'x_sampling', 'y_sampling', - 'aggregator' + 'aggregator', "op_threshold" ] # Options specific to a particular plot type @@ -383,9 +387,10 @@ def __init__( logx=None, logy=None, loglog=None, hover=None, subplots=False, label=None, invert=False, stacked=False, colorbar=None, datashade=False, rasterize=False, downsample=None, - row=None, col=None, debug=False, framewise=True, - aggregator=None, projection=None, global_extent=None, - geo=False, precompute=False, flip_xaxis=None, flip_yaxis=None, + op_threshold=None, row=None, col=None, + debug=False, framewise=True, aggregator=None, + projection=None, global_extent=None, geo=False, + precompute=False, flip_xaxis=None, flip_yaxis=None, dynspread=False, hover_cols=[], x_sampling=None, y_sampling=None, project=False, tools=[], attr_labels=None, coastline=False, tiles=False, sort_date=True, @@ -473,6 +478,7 @@ def __init__( self.precompute = precompute self.x_sampling = x_sampling self.y_sampling = y_sampling + self.op_threshold = op_threshold # By type self.subplots = subplots @@ -1362,11 +1368,11 @@ def method_wrapper(ds, x, y): if self._dim_ranges.get('c', (None, None)) != (None, None): style['clim'] = self._dim_ranges['c'] - if self.geo and self.crs != self.output_projection: - import geoviews as gv - obj = gv.project(obj, projection=self.output_projection) - - processed = operation(obj, **opts) + if self.op_threshold is not None: + operation_instance = operation.instance(**opts) + processed = apply_when(obj, operation=operation_instance, predicate=lambda x: len(x) > self.op_threshold) + else: + processed = operation(obj, **opts) if self.dynspread: processed = dynspread(processed, max_px=self.kwds.get('max_px', 3), From a05acdeedc2b475e3ebd707cb89f2ca6859d8a14 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Thu, 13 Jul 2023 21:57:13 -0700 Subject: [PATCH 02/14] Update docstring --- hvplot/converter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index 74736fdb9..f8e5fb44c 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -213,9 +213,11 @@ class HoloViewsConverter: returning an aggregated Image (to be colormapped by the plotting backend) instead of individual points op_threshold (default=None): - The threshold before applying the operation (datashade / rasterize); - if the number of points is below this value, the plot will - not be rasterized or datashaded. + The threshold before toggling the operation (datashade / rasterize); + if the number of individual points exceeds this value, the plot will + be rasterized or datashaded; else the plot with the original points + will be returned instead. If this is unset, the operation + will always be applied. x_sampling/y_sampling (default=None): Specifies the smallest allowed sampling interval along the x/y axis. From 12a04b9e759bd43fc856653d291cc9bdd79cbe3b Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Thu, 5 Oct 2023 09:19:01 -0700 Subject: [PATCH 03/14] Support gridded data --- hvplot/converter.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index f8e5fb44c..4a2efdc1a 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -212,7 +212,7 @@ class HoloViewsConverter: Whether to apply rasterization using the Datashader library, returning an aggregated Image (to be colormapped by the plotting backend) instead of individual points - op_threshold (default=None): + aggregation_threshold (default=None): The threshold before toggling the operation (datashade / rasterize); if the number of individual points exceeds this value, the plot will be rasterized or datashaded; else the plot with the original points @@ -292,7 +292,7 @@ class HoloViewsConverter: _op_options = [ 'datashade', 'rasterize', 'x_sampling', 'y_sampling', - 'aggregator', "op_threshold" + 'aggregator', "aggregation_threshold" ] # Options specific to a particular plot type @@ -389,7 +389,7 @@ def __init__( logx=None, logy=None, loglog=None, hover=None, subplots=False, label=None, invert=False, stacked=False, colorbar=None, datashade=False, rasterize=False, downsample=None, - op_threshold=None, row=None, col=None, + aggregation_threshold=None, row=None, col=None, debug=False, framewise=True, aggregator=None, projection=None, global_extent=None, geo=False, precompute=False, flip_xaxis=None, flip_yaxis=None, @@ -480,7 +480,7 @@ def __init__( self.precompute = precompute self.x_sampling = x_sampling self.y_sampling = y_sampling - self.op_threshold = op_threshold + self.aggregation_threshold = aggregation_threshold # By type self.subplots = subplots @@ -1370,12 +1370,7 @@ def method_wrapper(ds, x, y): if self._dim_ranges.get('c', (None, None)) != (None, None): style['clim'] = self._dim_ranges['c'] - if self.op_threshold is not None: - operation_instance = operation.instance(**opts) - processed = apply_when(obj, operation=operation_instance, predicate=lambda x: len(x) > self.op_threshold) - else: - processed = operation(obj, **opts) - + processed = self._aggregate_obj(operation, obj, opts) if self.dynspread: processed = dynspread(processed, max_px=self.kwds.get('max_px', 3), threshold=self.kwds.get('threshold', 0.5)) @@ -1385,6 +1380,24 @@ def method_wrapper(ds, x, y): layers = _transfer_opts_cur_backend(layers) return layers + def _aggregate_obj(self, operation, obj, opts): + def exceeds_aggregation_threshold(plot): + vdim = plot.vdims[0].name + data = plot.data[vdim] + data_size = np.size(data) + return data_size > self.aggregation_threshold + + if self.aggregation_threshold is not None: + operation_instance = operation.instance(**opts) + processed = apply_when( + obj, + operation=operation_instance, + predicate=exceeds_aggregation_threshold + ) + else: + processed = operation(obj, **opts) + return processed + def _get_opts(self, eltype, backend='bokeh', **custom): opts = dict(self._plot_opts, **dict(self._style_opts, **self._norm_opts)) opts.update(custom) From 5d379c777b3c63378af730a086c31a8804bddbff Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Thu, 5 Oct 2023 09:19:59 -0700 Subject: [PATCH 04/14] Add tests --- hvplot/converter.py | 2 +- hvplot/tests/testoperations.py | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index 4a2efdc1a..7aa72d9fc 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -472,6 +472,7 @@ def __init__( ylim = (y0, y1) # Operations + self.aggregation_threshold = aggregation_threshold self.datashade = datashade self.rasterize = rasterize self.downsample = downsample @@ -480,7 +481,6 @@ def __init__( self.precompute = precompute self.x_sampling = x_sampling self.y_sampling = y_sampling - self.aggregation_threshold = aggregation_threshold # By type self.subplots = subplots diff --git a/hvplot/tests/testoperations.py b/hvplot/tests/testoperations.py index e19e5d89c..0441f9ba6 100644 --- a/hvplot/tests/testoperations.py +++ b/hvplot/tests/testoperations.py @@ -9,8 +9,11 @@ import numpy as np import pandas as pd -from holoviews import Store +from holoviews import Store, render from holoviews.element import Image, QuadMesh, ImageStack +from holoviews.core.spaces import DynamicMap +from holoviews.core.overlay import Overlay +from holoviews.element.chart import Scatter from holoviews.element.comparison import ComparisonTestCase from hvplot.converter import HoloViewsConverter from packaging.version import Version @@ -205,6 +208,27 @@ def test_rasterize_by(self): assert isinstance(plot, ImageStack) assert plot.opts["cmap"] == cc.palette['glasbey_category10'] + @parameterized.expand([('rasterize',), ('datashade',)]) + def test_aggregation_threshold(self, operation): + df = pd.DataFrame( + np.random.multivariate_normal((0, 0), [[0.1, 0.1], [0.1, 1.0]], (5000,)) + ) + dmap = df.hvplot.scatter("0", "1", aggregation_threshold=1000) + assert isinstance(dmap, DynamicMap) + + render(dmap) # trigger dynamicmap + overlay = dmap.items()[0][1] + assert isinstance(overlay, Overlay) + + image = overlay.get(0) + assert isinstance(image, Image) + assert image.data["0_1 Count"].size + + scatter = overlay.get(1) + assert isinstance(scatter, Scatter) + assert len(scatter.data) == 0 + + class TestChart2D(ComparisonTestCase): def setUp(self): From ebfd5625cf528f2604fb1830fed45d8297b76b2f Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Mon, 17 Jul 2023 08:39:09 -0400 Subject: [PATCH 05/14] Use partial --- hvplot/converter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index 7aa72d9fc..73912afc1 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -1388,10 +1388,9 @@ def exceeds_aggregation_threshold(plot): return data_size > self.aggregation_threshold if self.aggregation_threshold is not None: - operation_instance = operation.instance(**opts) processed = apply_when( obj, - operation=operation_instance, + operation=partial(operation, **opts), predicate=exceeds_aggregation_threshold ) else: From fcf95e59175a29eb69212c03ebed477493f33509 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Thu, 5 Oct 2023 09:46:09 -0700 Subject: [PATCH 06/14] Address comments --- hvplot/converter.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index 73912afc1..a2663098e 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -216,8 +216,9 @@ class HoloViewsConverter: The threshold before toggling the operation (datashade / rasterize); if the number of individual points exceeds this value, the plot will be rasterized or datashaded; else the plot with the original points - will be returned instead. If this is unset, the operation - will always be applied. + will be returned instead. If this is unset and rasterize/datashade=True, + the plot will be rasterized or datashaded, regardless of the number of + points. x_sampling/y_sampling (default=None): Specifies the smallest allowed sampling interval along the x/y axis. @@ -292,7 +293,7 @@ class HoloViewsConverter: _op_options = [ 'datashade', 'rasterize', 'x_sampling', 'y_sampling', - 'aggregator', "aggregation_threshold" + 'downsample', 'aggregator', 'aggregation_threshold' ] # Options specific to a particular plot type From f14bcad4565a1fbe61909ed793ebea6ba23d339a Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Fri, 6 Oct 2023 11:03:16 -0700 Subject: [PATCH 07/14] Rename --- hvplot/converter.py | 20 ++++++++++---------- hvplot/tests/testoperations.py | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index 92deae9c3..030586653 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -212,7 +212,7 @@ class HoloViewsConverter: Whether to apply rasterization using the Datashader library, returning an aggregated Image (to be colormapped by the plotting backend) instead of individual points - aggregation_threshold (default=None): + resample_when (default=None): The threshold before toggling the operation (datashade / rasterize); if the number of individual points exceeds this value, the plot will be rasterized or datashaded; else the plot with the original points @@ -293,7 +293,7 @@ class HoloViewsConverter: _op_options = [ 'datashade', 'rasterize', 'x_sampling', 'y_sampling', - 'downsample', 'aggregator', 'aggregation_threshold' + 'downsample', 'aggregator', 'resample_when' ] # Options specific to a particular plot type @@ -390,7 +390,7 @@ def __init__( logx=None, logy=None, loglog=None, hover=None, subplots=False, label=None, invert=False, stacked=False, colorbar=None, datashade=False, rasterize=False, downsample=None, - aggregation_threshold=None, row=None, col=None, + resample_when=None, row=None, col=None, debug=False, framewise=True, aggregator=None, projection=None, global_extent=None, geo=False, precompute=False, flip_xaxis=None, flip_yaxis=None, @@ -474,7 +474,7 @@ def __init__( ylim = (y0, y1) # Operations - self.aggregation_threshold = aggregation_threshold + self.resample_when = resample_when self.datashade = datashade self.rasterize = rasterize self.downsample = downsample @@ -1373,7 +1373,7 @@ def method_wrapper(ds, x, y): if self._dim_ranges.get('c', (None, None)) != (None, None): style['clim'] = self._dim_ranges['c'] - processed = self._aggregate_obj(operation, obj, opts) + processed = self._resample_obj(operation, obj, opts) if self.dynspread: processed = dynspread(processed, max_px=self.kwds.get('max_px', 3), threshold=self.kwds.get('threshold', 0.5)) @@ -1383,18 +1383,18 @@ def method_wrapper(ds, x, y): layers = _transfer_opts_cur_backend(layers) return layers - def _aggregate_obj(self, operation, obj, opts): - def exceeds_aggregation_threshold(plot): + def _resample_obj(self, operation, obj, opts): + def exceeds_resample_when(plot): vdim = plot.vdims[0].name data = plot.data[vdim] data_size = np.size(data) - return data_size > self.aggregation_threshold + return data_size > self.resample_when - if self.aggregation_threshold is not None: + if self.resample_when is not None: processed = apply_when( obj, operation=partial(operation, **opts), - predicate=exceeds_aggregation_threshold + predicate=exceeds_resample_when ) else: processed = operation(obj, **opts) diff --git a/hvplot/tests/testoperations.py b/hvplot/tests/testoperations.py index 1ffa3c98e..7447e8538 100644 --- a/hvplot/tests/testoperations.py +++ b/hvplot/tests/testoperations.py @@ -209,11 +209,11 @@ def test_rasterize_by(self): assert plot.opts["cmap"] == cc.palette['glasbey_category10'] @parameterized.expand([('rasterize',), ('datashade',)]) - def test_aggregation_threshold(self, operation): + def test_apply_when(self, operation): df = pd.DataFrame( np.random.multivariate_normal((0, 0), [[0.1, 0.1], [0.1, 1.0]], (5000,)) ) - dmap = df.hvplot.scatter("0", "1", aggregation_threshold=1000) + dmap = df.hvplot.scatter("0", "1", apply_when=1000) assert isinstance(dmap, DynamicMap) render(dmap) # trigger dynamicmap From 8b53b20ced041a2cb02ed884f5cc15c6f3aad96f Mon Sep 17 00:00:00 2001 From: bikegeek <3753118+bikegeek@users.noreply.github.com> Date: Sun, 8 Oct 2023 07:45:20 -0600 Subject: [PATCH 08/14] Feature reference gallery (#999) Co-authored-by: maximlt --- examples/reference/pandas/kde.ipynb | 2 +- examples/reference/pandas/line.ipynb | 2 +- examples/reference/xarray/bar.ipynb | 76 ++++++++++++++++++++++++++ examples/reference/xarray/hist.ipynb | 66 ++++++++++++++++++++++ examples/reference/xarray/kde.ipynb | 53 ++++++++++++++++++ examples/reference/xarray/line.ipynb | 66 ++++++++++++++++++++++ examples/reference/xarray/violin.ipynb | 53 ++++++++++++++++++ 7 files changed, 316 insertions(+), 2 deletions(-) create mode 100644 examples/reference/xarray/bar.ipynb create mode 100644 examples/reference/xarray/hist.ipynb create mode 100644 examples/reference/xarray/kde.ipynb create mode 100644 examples/reference/xarray/line.ipynb create mode 100644 examples/reference/xarray/violin.ipynb diff --git a/examples/reference/pandas/kde.ipynb b/examples/reference/pandas/kde.ipynb index 6078cee74..8697119f6 100644 --- a/examples/reference/pandas/kde.ipynb +++ b/examples/reference/pandas/kde.ipynb @@ -80,5 +80,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/examples/reference/pandas/line.ipynb b/examples/reference/pandas/line.ipynb index fcab731c0..c1246d362 100644 --- a/examples/reference/pandas/line.ipynb +++ b/examples/reference/pandas/line.ipynb @@ -44,5 +44,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/examples/reference/xarray/bar.ipynb b/examples/reference/xarray/bar.ipynb new file mode 100644 index 000000000..74d4641e9 --- /dev/null +++ b/examples/reference/xarray/bar.ipynb @@ -0,0 +1,76 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "5d0ed0f8-ce26-4c54-9161-d34ca73ae716", + "metadata": {}, + "outputs": [], + "source": [ + "import hvplot.xarray # noqa\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "e3f7e297-9576-4c36-9a9f-a4200ccf2d36", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "A `bar` plot represents **categorical data** with rectangular bars with heights proportional to the **numerical values** that they represent.\n", + "The x-axis represents the categories and the y axis represents the numerical value scale.\n", + "The bars are of equal width which allows for instant comparison of data." + ] + }, + { + "cell_type": "markdown", + "id": "af429bbb-9a65-4b5d-b447-ce50b35dfedc", + "metadata": {}, + "source": [ + "## Data\n", + "\n", + "Let's load some data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8db8fff7-b85f-49ea-b281-4a4d86e0f3ee", + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.tutorial.open_dataset('air_temperature').load()\n", + "air = ds.air\n", + "air1d = air.sel(lon=285.,lat=40.).groupby('time.month').mean()\n", + "air1d" + ] + }, + { + "cell_type": "markdown", + "id": "629c5172-9cba-4f5f-bf3b-67e0a390385b", + "metadata": {}, + "source": [ + "## Basic Bar Plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfd1afa2-e310-4630-bd30-96e390078caf", + "metadata": {}, + "outputs": [], + "source": [ + "air1d.hvplot.bar(y='air', height=500, title=\"Air Temperature by Month\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/reference/xarray/hist.ipynb b/examples/reference/xarray/hist.ipynb new file mode 100644 index 000000000..9cf6dfd86 --- /dev/null +++ b/examples/reference/xarray/hist.ipynb @@ -0,0 +1,66 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hvplot.xarray # noqa\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`hist` is often a good way to start looking at data to get a sense of the distribution. Similar methods include [`kde`](kde.ipny) (also available as `density`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.tutorial.open_dataset('air_temperature').load()\n", + "air = ds.air\n", + "air1d = air.sel(lon=285.,lat=40.)\n", + "air1d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "air1d.hvplot.hist()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Customize the plot by changing the title and bar color." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "air1d.hvplot.hist(title=\"Air Temperature over time at lat=40,lon285\", color='gray')" + ] + } + ], + "metadata": { + "language_info": { + "name": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/reference/xarray/kde.ipynb b/examples/reference/xarray/kde.ipynb new file mode 100644 index 000000000..615442feb --- /dev/null +++ b/examples/reference/xarray/kde.ipynb @@ -0,0 +1,53 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2ccf9fd5-9d10-4522-961d-7e8d236213b2", + "metadata": {}, + "outputs": [], + "source": [ + "import hvplot.xarray # noqa\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "f7ed6c65-2280-4741-b89b-721110628547", + "metadata": {}, + "source": [ + "Kernel density estimate (`kde`) provides a mechanism for showing the distribution and spread of the data. In `hvplot` the method is exposed both as `kde` and `density`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8680d48-02b1-4480-96e3-d0fd7807a804", + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.tutorial.open_dataset('air_temperature').load()\n", + "air = ds.air\n", + "air1d = air.sel(lat=[25, 50, 75])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f33d3355-deaa-4bdf-befa-af20d2e46d7a", + "metadata": {}, + "outputs": [], + "source": [ + "air1d.hvplot.kde('air', by='lat', alpha=0.5)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/reference/xarray/line.ipynb b/examples/reference/xarray/line.ipynb new file mode 100644 index 000000000..2cf211358 --- /dev/null +++ b/examples/reference/xarray/line.ipynb @@ -0,0 +1,66 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hvplot.xarray # noqa\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`line` is useful when data is continuous and has a continuous axis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.tutorial.open_dataset('air_temperature').load()\n", + "air = ds.air\n", + "air1d = air.sel(lon=285.,lat=40.)\n", + "air1d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "air1d.hvplot.line()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Customize the plot by changing the title and line color." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "air1d.hvplot.line(title=\"Air Temperature over time at lat=40,lon285\",line_color='gray')" + ] + } + ], + "metadata": { + "language_info": { + "name": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/reference/xarray/violin.ipynb b/examples/reference/xarray/violin.ipynb new file mode 100644 index 000000000..62aa8cdbd --- /dev/null +++ b/examples/reference/xarray/violin.ipynb @@ -0,0 +1,53 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2ccf9fd5-9d10-4522-961d-7e8d236213b2", + "metadata": {}, + "outputs": [], + "source": [ + "import hvplot.xarray # noqa\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "f7ed6c65-2280-4741-b89b-721110628547", + "metadata": {}, + "source": [ + "`violin` plots are similar to box plots, but provide a better sense of the distribution of data. Note that `violin` plots depend on the `scipy` library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0f6ecc7-0f6a-4590-9c98-da10a674b9b0", + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.tutorial.open_dataset('air_temperature').load()\n", + "air = ds.air\n", + "air" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a137994-0a9d-4a75-8192-1e4bb4ead6c5", + "metadata": {}, + "outputs": [], + "source": [ + "air.hvplot.violin(y='air', by='lat', color='lat', cmap='Category20', title=\"Air Temperature vs. latitude\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From e255f44acc9483f1b266626a369820c98f8509d8 Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Sun, 8 Oct 2023 06:46:13 -0700 Subject: [PATCH 09/14] Adds fugue integration (#1102) Co-authored-by: maximlt --- hvplot/fugue.py | 62 +++++++++++++++++++++++++++++++++++++++ hvplot/tests/testfugue.py | 46 +++++++++++++++++++++++++++++ hvplot/util.py | 2 +- setup.py | 1 + 4 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 hvplot/fugue.py create mode 100644 hvplot/tests/testfugue.py diff --git a/hvplot/fugue.py b/hvplot/fugue.py new file mode 100644 index 000000000..65a18ee69 --- /dev/null +++ b/hvplot/fugue.py @@ -0,0 +1,62 @@ +""" +Experimental support for fugue. +""" +from typing import Any, Dict, Tuple + +import panel as _pn + +from . import hvPlotTabular, post_patch +from .util import _fugue_ipython + +def patch(name="hvplot", extension="bokeh", logo=False): + try: + from fugue import DataFrames, Outputter + from fugue.extensions import namespace_candidate, parse_outputter + except: + raise ImportError( + 'Could not add fugue support as it could not be imported. ' + 'Please make sure you have installed fugue in your environment.' + ) + + import hvplot.pandas # noqa: F401 + + class _Visualize(Outputter): + def __init__(self, func: str) -> None: + super().__init__() + self._func = func + getattr(hvPlotTabular, func) # ensure the func exists + + def process(self, dfs: DataFrames) -> None: + """ + Process the dataframes and output the result as + a pn.Column. + + Parameters: + ----------- + dfs: fugue.DataFrames + """ + charts = [] + for df in dfs.values(): + params = dict(self.params) + opts: Dict[str, Any] = params.pop("opts", {}) + chart = getattr(df.as_pandas().hvplot, self._func)(**params).opts(**opts) + charts.append(chart) + col = _pn.Column(*charts) + try: + if not _fugue_ipython: + get_ipython() + except NameError: + col.show() # in script + else: + from IPython.display import display + display(col) # in notebook + + + @parse_outputter.candidate(namespace_candidate(name, lambda x: isinstance(x, str))) + def _parse_hvplot(obj: Tuple[str, str]) -> Outputter: + return _Visualize(obj[1]) + + post_patch(extension, logo) + + +patch() diff --git a/hvplot/tests/testfugue.py b/hvplot/tests/testfugue.py new file mode 100644 index 000000000..7d4fe014d --- /dev/null +++ b/hvplot/tests/testfugue.py @@ -0,0 +1,46 @@ +"""Fugue test suite""" + +import hvplot +import pandas as pd +import pytest + +# Patch required before importing hvplot.fugue +hvplot.util._fugue_ipython = True + +try: + import fugue.api as fa + import hvplot.fugue # noqa: F401 +except: + pytest.skip(allow_module_level=True) + + +@pytest.fixture +def table(): + df = pd.DataFrame( + { + "g": ["a", "b", "a", "b", "a", "b"], + "x": [1, 2, 3, 4, 5, 6], + "y": [1, 2, 3, 4, 5, 6], + } + ) + return df + + +def test_fugure_ipython_line(table, capsys): + """hvplot works with Fugue""" + fa.fugue_sql( + """ + OUTPUT table USING hvplot:line( + x="x", + y="y", + by="g", + size=100, + opts={"width": 500, "height": 500} + ) + """ + ) + # Check that the output contains the following: + # Column + # [0] HoloViews(NdOverlay) + output = capsys.readouterr().out + assert output == 'Column\n [0] HoloViews(NdOverlay)\n' diff --git a/hvplot/util.py b/hvplot/util.py index 297b7cea7..a862d348e 100644 --- a/hvplot/util.py +++ b/hvplot/util.py @@ -25,7 +25,7 @@ bokeh_version = Version(bokeh.__version__) bokeh3 = bokeh_version >= Version("3.0") param2 = Version(param.__version__) >= Version("2.0rc4") - +_fugue_ipython = None # To be set to True in tests to mock ipython def with_hv_extension(func, extension='bokeh', logo=False): """If hv.extension is not loaded, load before calling function""" diff --git a/setup.py b/setup.py index 63f1d95b1..06cd312ca 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ def get_setup_version(reponame): 'ipywidgets', 'dask', 'polars', + 'fugue', ] # Dependencies required to run the notebooks From f05f3049eceb93a2694d10c66c4cbe3cbe83e6c4 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Mon, 9 Oct 2023 14:30:00 -0700 Subject: [PATCH 10/14] Rename properly --- hvplot/converter.py | 3 ++- hvplot/tests/testoperations.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index 030586653..9e4c81b64 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -1300,6 +1300,7 @@ def method_wrapper(ds, x, y): opts['x_range'] = self._plot_opts['xlim'] layers = downsample1d(obj, **opts) layers = _transfer_opts_cur_backend(layers) + processed = self._resample_obj(downsample1d, obj, opts) return layers try: @@ -1362,7 +1363,7 @@ def method_wrapper(ds, x, y): opts['cnorm'] = self._plot_opts['cnorm'] if 'rescale_discrete_levels' in self._plot_opts: opts['rescale_discrete_levels'] = self._plot_opts['rescale_discrete_levels'] - else: + elif self.rasterize: operation = rasterize if Version(hv.__version__) < Version('1.18.0a1'): eltype = 'Image' diff --git a/hvplot/tests/testoperations.py b/hvplot/tests/testoperations.py index 7447e8538..df8c63dec 100644 --- a/hvplot/tests/testoperations.py +++ b/hvplot/tests/testoperations.py @@ -208,12 +208,12 @@ def test_rasterize_by(self): assert isinstance(plot, ImageStack) assert plot.opts["cmap"] == cc.palette['glasbey_category10'] - @parameterized.expand([('rasterize',), ('datashade',)]) - def test_apply_when(self, operation): + @parameterized.expand([('rasterize',), ('datashade',), ('downsample',)]) + def test_resample_when(self, operation): df = pd.DataFrame( np.random.multivariate_normal((0, 0), [[0.1, 0.1], [0.1, 1.0]], (5000,)) ) - dmap = df.hvplot.scatter("0", "1", apply_when=1000) + dmap = df.hvplot.scatter("0", "1", resample_when=1000, **{operation: True}) assert isinstance(dmap, DynamicMap) render(dmap) # trigger dynamicmap From cde802ce766817eef06a8d43741c4c870aa405fd Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Tue, 10 Oct 2023 08:26:12 -0700 Subject: [PATCH 11/14] Add resample_when support for downsample --- hvplot/converter.py | 3 +-- hvplot/tests/testoperations.py | 29 ++++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index 9e4c81b64..eb5b7253c 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -1298,9 +1298,8 @@ def method_wrapper(ds, x, y): opts['x_sampling'] = self.x_sampling if self._plot_opts.get('xlim') is not None: opts['x_range'] = self._plot_opts['xlim'] - layers = downsample1d(obj, **opts) + layers = self._resample_obj(downsample1d, obj, opts) layers = _transfer_opts_cur_backend(layers) - processed = self._resample_obj(downsample1d, obj, opts) return layers try: diff --git a/hvplot/tests/testoperations.py b/hvplot/tests/testoperations.py index df8c63dec..1c77bf92a 100644 --- a/hvplot/tests/testoperations.py +++ b/hvplot/tests/testoperations.py @@ -208,12 +208,12 @@ def test_rasterize_by(self): assert isinstance(plot, ImageStack) assert plot.opts["cmap"] == cc.palette['glasbey_category10'] - @parameterized.expand([('rasterize',), ('datashade',), ('downsample',)]) - def test_resample_when(self, operation): + @parameterized.expand([('rasterize',), ('datashade',)]) + def test_operation_resample_when(self, operation): df = pd.DataFrame( np.random.multivariate_normal((0, 0), [[0.1, 0.1], [0.1, 1.0]], (5000,)) - ) - dmap = df.hvplot.scatter("0", "1", resample_when=1000, **{operation: True}) + ).rename({0: "x", 1: "y"}, axis=1) + dmap = df.hvplot.scatter("x", "y", resample_when=1000, **{operation: True}) assert isinstance(dmap, DynamicMap) render(dmap) # trigger dynamicmap @@ -222,7 +222,26 @@ def test_resample_when(self, operation): image = overlay.get(0) assert isinstance(image, Image) - assert image.data["0_1 Count"].size + assert len(image.data) > 0 + + scatter = overlay.get(1) + assert isinstance(scatter, Scatter) + assert len(scatter.data) == 0 + + def test_downsample_resample_when(self): + df = pd.DataFrame( + np.random.multivariate_normal((0, 0), [[0.1, 0.1], [0.1, 1.0]], (5000,)) + ).rename({0: "x", 1: "y"}, axis=1) + dmap = df.hvplot.scatter("x", "y", resample_when=1000, downsample=True) + assert isinstance(dmap, DynamicMap) + + render(dmap) # trigger dynamicmap + overlay = dmap.items()[0][1] + assert isinstance(overlay, Overlay) + + downsampled = overlay.get(0) + assert isinstance(downsampled, Scatter) + assert len(downsampled.data) > 0 scatter = overlay.get(1) assert isinstance(scatter, Scatter) From 4e7857f3fb2fddb59bd037f10de2621f6d6a49ad Mon Sep 17 00:00:00 2001 From: maximlt Date: Wed, 11 Oct 2023 18:49:21 +0200 Subject: [PATCH 12/14] leverage element length --- hvplot/converter.py | 5 +---- hvplot/tests/testoperations.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index d25373af2..6042407db 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -1385,10 +1385,7 @@ def method_wrapper(ds, x, y): def _resample_obj(self, operation, obj, opts): def exceeds_resample_when(plot): - vdim = plot.vdims[0].name - data = plot.data[vdim] - data_size = np.size(data) - return data_size > self.resample_when + return len(plot) > self.resample_when if self.resample_when is not None: processed = apply_when( diff --git a/hvplot/tests/testoperations.py b/hvplot/tests/testoperations.py index 1c77bf92a..0ee723911 100644 --- a/hvplot/tests/testoperations.py +++ b/hvplot/tests/testoperations.py @@ -10,7 +10,7 @@ import pandas as pd from holoviews import Store, render -from holoviews.element import Image, QuadMesh, ImageStack +from holoviews.element import Image, QuadMesh, ImageStack, Points from holoviews.core.spaces import DynamicMap from holoviews.core.overlay import Overlay from holoviews.element.chart import Scatter @@ -228,11 +228,12 @@ def test_operation_resample_when(self, operation): assert isinstance(scatter, Scatter) assert len(scatter.data) == 0 - def test_downsample_resample_when(self): + @parameterized.expand([('points', Points), ('scatter', Scatter)]) + def test_downsample_resample_when(self, kind, eltype): df = pd.DataFrame( np.random.multivariate_normal((0, 0), [[0.1, 0.1], [0.1, 1.0]], (5000,)) ).rename({0: "x", 1: "y"}, axis=1) - dmap = df.hvplot.scatter("x", "y", resample_when=1000, downsample=True) + dmap = df.hvplot(kind=kind, x="x", y="y", resample_when=1000, downsample=True) assert isinstance(dmap, DynamicMap) render(dmap) # trigger dynamicmap @@ -240,12 +241,12 @@ def test_downsample_resample_when(self): assert isinstance(overlay, Overlay) downsampled = overlay.get(0) - assert isinstance(downsampled, Scatter) - assert len(downsampled.data) > 0 + assert isinstance(downsampled, eltype) + assert len(downsampled) > 0 - scatter = overlay.get(1) - assert isinstance(scatter, Scatter) - assert len(scatter.data) == 0 + element = overlay.get(1) + assert isinstance(element, eltype) + assert len(element) == 0 class TestChart2D(ComparisonTestCase): From 26384d19575ee6940147e2f4ee6125b716cf0d0a Mon Sep 17 00:00:00 2001 From: maximlt Date: Wed, 11 Oct 2023 19:11:15 +0200 Subject: [PATCH 13/14] raise if resample_when set without resampling operation --- hvplot/converter.py | 5 +++++ hvplot/tests/testoperations.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/hvplot/converter.py b/hvplot/converter.py index 6042407db..a331fd9bd 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -474,6 +474,11 @@ def __init__( ylim = (y0, y1) # Operations + if resample_when is not None and not any([rasterize, datashade, downsample]): + raise ValueError( + 'At least one resampling operation (rasterize, datashader, ' + 'downsample) must be enabled when resample_when is set.' + ) self.resample_when = resample_when self.datashade = datashade self.rasterize = rasterize diff --git a/hvplot/tests/testoperations.py b/hvplot/tests/testoperations.py index 0ee723911..47b237d96 100644 --- a/hvplot/tests/testoperations.py +++ b/hvplot/tests/testoperations.py @@ -8,6 +8,7 @@ import hvplot.pandas # noqa import numpy as np import pandas as pd +import pytest from holoviews import Store, render from holoviews.element import Image, QuadMesh, ImageStack, Points @@ -208,6 +209,13 @@ def test_rasterize_by(self): assert isinstance(plot, ImageStack) assert plot.opts["cmap"] == cc.palette['glasbey_category10'] + def test_resample_when_error_unset_operation(self): + with pytest.raises( + ValueError, + match='At least one resampling operation' + ): + self.df.hvplot(x='x', y='y', resample_when=10) + @parameterized.expand([('rasterize',), ('datashade',)]) def test_operation_resample_when(self, operation): df = pd.DataFrame( From 756a4b7f0575abcf79ee05e25c980482f0a0437f Mon Sep 17 00:00:00 2001 From: maximlt Date: Wed, 11 Oct 2023 19:11:46 +0200 Subject: [PATCH 14/14] rework the docstring --- hvplot/converter.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/hvplot/converter.py b/hvplot/converter.py index a331fd9bd..56546f2b9 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -185,7 +185,7 @@ class HoloViewsConverter: check_symmetric_max (default=1000000): Size above which to stop checking for symmetry by default on the data. - Downsampling options + Resampling options ------------------ aggregator (default=None): Aggregator to use when applying rasterize or datashade operation @@ -213,12 +213,9 @@ class HoloViewsConverter: returning an aggregated Image (to be colormapped by the plotting backend) instead of individual points resample_when (default=None): - The threshold before toggling the operation (datashade / rasterize); - if the number of individual points exceeds this value, the plot will - be rasterized or datashaded; else the plot with the original points - will be returned instead. If this is unset and rasterize/datashade=True, - the plot will be rasterized or datashaded, regardless of the number of - points. + Applies a resampling operation (datashade, rasterize or downsample) if + the number of individual data points present in the current zoom range + is above this threshold. The raw plot is displayed otherwise. x_sampling/y_sampling (default=None): Specifies the smallest allowed sampling interval along the x/y axis.