Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add viewport downsample algorithm #6017

Merged
merged 33 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
043f7fc
Add viewport downsample algorithm
hoxbro Dec 8, 2023
bf42891
pop non relevant stream ranges in RangeX and RangeY
hoxbro Dec 19, 2023
9141979
Rough implementation of finding first value outside of range
hoxbro Dec 19, 2023
0943eea
Remove unnessary 0
hoxbro Dec 19, 2023
45d89da
Add hack to work with xlim
hoxbro Dec 19, 2023
b2311b6
Use slice instead of np.arange for nth
hoxbro Dec 19, 2023
3417c3d
Merge branch 'main' into viewport_downsample
hoxbro Dec 19, 2023
e0ffefd
Improve detection first values outside of viewport
hoxbro Dec 19, 2023
d033ca0
Use boolean logic instead
hoxbro Dec 20, 2023
fef1b73
Add select_mask_neighbor
hoxbro Dec 20, 2023
d226e6f
Add _select_mask_neighbor to dask interface
hoxbro Dec 20, 2023
ae35f9e
Add error message to logging
hoxbro Dec 20, 2023
3ceaf73
Correct if statement in dask interface
hoxbro Dec 20, 2023
7ef8c6c
Remove xlim hack
hoxbro Dec 20, 2023
5fc5e73
Nit change
hoxbro Dec 20, 2023
662cc90
Move x_range check down
hoxbro Dec 20, 2023
8111966
Send empty plot for real this time
hoxbro Dec 20, 2023
3c9d2f6
Add viewport-xlim as workaround for now
hoxbro Dec 22, 2023
6ba3b6f
Add width points to get a better approximation of the data in the y-d…
hoxbro Dec 22, 2023
06436c5
Add unit test
hoxbro Dec 22, 2023
9d6adc8
Add viewport options to docstring
hoxbro Dec 22, 2023
ee457ae
Update comment
hoxbro Dec 22, 2023
3e5eb01
Merge branch 'main' into viewport_downsample
hoxbro Jan 17, 2024
257e5fb
Only send the needed data during the initial rendering
hoxbro Jan 17, 2024
4b738d0
Merge branch 'main' into viewport_downsample
hoxbro Jan 17, 2024
aa477f4
Discard change callbacks until plot is fully initialized
philippjfr Jan 24, 2024
a44cc54
Update holoviews/plotting/bokeh/callbacks.py
hoxbro Jan 24, 2024
fda8958
Merge branch 'main' into viewport_downsample
hoxbro Jan 29, 2024
3ddb49c
Update tests
hoxbro Jan 29, 2024
2a64d0e
Small fixes
hoxbro Jan 30, 2024
d6b654f
Extract compute mask from _process and make it an options
hoxbro Jan 30, 2024
0e9ca5d
Ignore cuDF test
hoxbro Jan 30, 2024
1de1336
Merge branch 'main' into viewport_downsample
hoxbro Jan 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions holoviews/core/data/cudf.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,18 @@ def select_mask(cls, dataset, selection):
mask &= new_mask
return mask

@classmethod
def _select_mask_neighbor(cls, dataset, selection):
"""Runs select mask and expand the True values to include its neighbors

Example

select_mask = [False, False, True, True, False, False]
select_mask_neighbor = [False, True, True, True, True, False]

"""
raise NotImplementedError

@classmethod
def select(cls, dataset, selection_mask=None, **selection):
df = dataset.data
Expand Down
20 changes: 20 additions & 0 deletions holoviews/core/data/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ def select_mask(cls, dataset, selection):
def select(cls, dataset, selection_mask=None, **selection):
df = dataset.data
if selection_mask is not None:
import dask.array as da
if isinstance(selection_mask, da.Array):
return df.loc[selection_mask]
return df[selection_mask]
selection_mask = cls.select_mask(dataset, selection)
indexed = cls.indexed(dataset, selection)
Expand All @@ -162,6 +165,23 @@ def select(cls, dataset, selection_mask=None, **selection):
return df[dataset.vdims[0].name].compute().iloc[0]
return df

@classmethod
def _select_mask_neighbor(cls, dataset, selection):
"""Runs select mask and expand the True values to include its neighbors

Example

select_mask = [False, False, True, True, False, False]
select_mask_neighbor = [False, True, True, True, True, False]

"""
mask = cls.select_mask(dataset, selection)
mask = mask.to_dask_array().compute_chunk_sizes()
extra = mask[1:] ^ mask[:-1]
mask[1:] |= extra
mask[:-1] |= extra
return mask

@classmethod
def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
index_dims = [dataset.get_dimension(d) for d in dimensions]
Expand Down
12 changes: 12 additions & 0 deletions holoviews/core/data/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,18 @@ def select_mask(cls, dataset, selection):
predicates.append(column == object)
return predicates

@classmethod
def _select_mask_neighbor(cls, dataset, selection):
"""Runs select mask and expand the True values to include its neighbors

Example

select_mask = [False, False, True, True, False, False]
select_mask_neighbor = [False, True, True, True, True, False]

"""
raise NotImplementedError

@classmethod
def sample(cls, dataset, samples=None):
import ibis
Expand Down
15 changes: 15 additions & 0 deletions holoviews/core/data/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,21 @@ def select_mask(cls, dataset, selection):
mask &= index_mask
return mask

@classmethod
def _select_mask_neighbor(cls, dataset, selection):
"""Runs select mask and expand the True values to include its neighbors

Example

select_mask = [False, False, True, True, False, False]
select_mask_neighbor = [False, True, True, True, True, False]

"""
mask = cls.select_mask(dataset, selection)
extra = mask[1:] ^ mask[:-1]
mask[1:] |= extra
mask[:-1] |= extra
return mask

@classmethod
def indexed(cls, dataset, selection):
Expand Down
42 changes: 37 additions & 5 deletions holoviews/operation/downsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,13 @@ def _nth_point(x, y, n_out, **kwargs):
y (np.ndarray): The y-values of the data.
n_out (int): The number of output points.
Returns:
np.array: The indexes of the selected datapoints.
slice: The slice of selected datapoints.
"""
n_samples = len(x)
return np.arange(0, n_samples, max(1, math.ceil(n_samples / n_out)))
return slice(0, n_samples, max(1, math.ceil(n_samples / n_out)))

def _viewport(x, y, n_out, **kwargs):
return slice(len(x))

def _min_max(x, y, n_out, **kwargs):
try:
Expand Down Expand Up @@ -191,6 +194,7 @@ def _m4(x, y, n_out, **kwargs):
_ALGORITHMS = {
'lttb': _lttb,
'nth': _nth_point,
'viewport': _viewport,
'minmax': _min_max,
'minmax-lttb': _min_max_lttb,
'm4': _m4,
Expand All @@ -207,13 +211,14 @@ class downsample1d(ResampleOperation1D):
algorithm = param.Selector(default='lttb', objects=list(_ALGORITHMS), doc="""
The algorithm to use for downsampling:

- `lttb`: Largest Triangle Three Buckets downsample algorithm
- `lttb`: Largest Triangle Three Buckets downsample algorithm.
- `nth`: Selects every n-th point.
- `viewport`: Selects all points in a given viewport.
- `minmax`: Selects the min and max value in each bin (requires tsdownsampler).
- `m4`: Selects the min, max, first and last value in each bin (requires tsdownsampler).
- `minmax-lttb`: First selects n_out * minmax_ratio min and max values,
then further reduces these to n_out values using the
Largest Triangle Three Buckets algorithm. (requires tsdownsampler)""")
Largest Triangle Three Buckets algorithm (requires tsdownsampler).""")

parallel = param.Boolean(default=False, doc="""
The number of threads to use (if tsdownsampler is available).""")
Expand All @@ -223,6 +228,10 @@ class downsample1d(ResampleOperation1D):
values to generate with the minmax algorithm before further
downsampling with LTTB.""")

neighbor_points = param.Boolean(default=None, doc="""
Whether to add the neighbor points to the range before downsampling.
By default this is only enabled for the viewport algorithm.""")

def _process(self, element, key=None):
if isinstance(element, (Overlay, NdOverlay)):
_process = partial(self._process, key=key)
Expand All @@ -233,7 +242,8 @@ def _process(self, element, key=None):
return element.clone(elements)

if self.p.x_range:
element = element[slice(*self.p.x_range)]
mask = self._compute_mask(element)
element = element[mask]
if len(element) <= self.p.width:
return element
xs, ys = (element.dimension_values(i) for i in range(2))
Expand All @@ -249,3 +259,25 @@ def _process(self, element, key=None):
kwargs['minmax_ratio'] = self.p.minmax_ratio
samples = downsample(xs, ys, self.p.width, parallel=self.p.parallel, **kwargs)
return element.iloc[samples]

def _compute_mask(self, element):
"""
Computes the mask to apply to the element before downsampling.
"""
neighbor_enabled = (
self.p.neighbor_points
if self.p.neighbor_points is not None
else self.p.algorithm == "viewport"
)
if not neighbor_enabled:
return slice(*self.p.x_range)
try:
mask = element.dataset.interface._select_mask_neighbor(
element.dataset, {element.kdims[0]: self.p.x_range}
)
except NotImplementedError:
mask = slice(*self.p.x_range)
except Exception as e:
self.param.warning(f"Could not apply neighbor mask to downsample1d: {e}")
mask = slice(*self.p.x_range)
return mask
7 changes: 7 additions & 0 deletions holoviews/plotting/bokeh/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def set_callback(self, handle):
if self.on_changes:
change_handler = lambda attr, old, new: (
asyncio.create_task(self.on_change(attr, old, new))
if self.plot.document else None
)
for change in self.on_changes:
if change in ['patching', 'streaming']:
Expand Down Expand Up @@ -645,6 +646,12 @@ class RangeXYCallback(Callback):
'y1': 'cb_obj.y1',
}

def initialize(self, plot_id=None):
super().initialize(plot_id)
for stream in self.streams:
msg = self._process_msg({})
stream.update(**msg)

def _process_msg(self, msg):
if self.plot.state.x_range is not self.plot.handles['x_range']:
x_range = self.plot.handles['x_range']
Expand Down
3 changes: 0 additions & 3 deletions holoviews/plotting/bokeh/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,6 @@ def _update_datasource(self, source, data):
"""
Update datasource with data for a new frame.
"""
if not self.document:
return

data = self._postprocess_data(data)
empty = all(len(v) == 0 for v in data.values())
if (self.streaming and self.streaming[0].data is self.current_frame.data
Expand Down
8 changes: 8 additions & 0 deletions holoviews/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,6 +1480,10 @@ class RangeX(LinkedStream):
x_range = param.Tuple(default=None, length=2, constant=True, doc="""
Range of the x-axis of a plot in data coordinates""")

def _set_stream_parameters(self, **kwargs):
kwargs.pop("y_range", None)
super()._set_stream_parameters(**kwargs)


class RangeY(LinkedStream):
"""
Expand All @@ -1489,6 +1493,10 @@ class RangeY(LinkedStream):
y_range = param.Tuple(default=None, length=2, constant=True, doc="""
Range of the y-axis of a plot in data coordinates""")

def _set_stream_parameters(self, **kwargs):
kwargs.pop("x_range", None)
super()._set_stream_parameters(**kwargs)


class BoundsXY(LinkedStream):
"""
Expand Down
6 changes: 6 additions & 0 deletions holoviews/tests/core/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,12 @@ def test_dataset_transform_add_ht(self):
kdims=self.kdims, vdims=self.vdims+['combined'])
self.assertEqual(transformed, expected)

def test_select_with_neighbor(self):
select = self.table.interface.select_mask(self.table.dataset, {"Weight": 18})
select_neighbor = self.table.interface._select_mask_neighbor(self.table.dataset, dict(Weight=18))

np.testing.assert_almost_equal(select, [False, True, False])
np.testing.assert_almost_equal(select_neighbor, [True, True, True])


class ScalarColumnTests:
Expand Down
7 changes: 7 additions & 0 deletions holoviews/tests/core/data/test_cudfinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,10 @@ def test_dataset_groupby_second_dim(self):

def test_dataset_aggregate_string_types_size(self):
raise SkipTest("cuDF does not support variance aggregation")

def test_select_with_neighbor(self):
try:
# Not currently supported by CuDF
super().test_select_with_neighbor()
except NotImplementedError:
raise SkipTest("Not supported")
7 changes: 7 additions & 0 deletions holoviews/tests/core/data/test_ibisinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,13 @@ def test_aggregation_operations(self):

self.compare_dataset(expected, result, msg=str(agg))

def test_select_with_neighbor(self):
try:
# Not currently supported by Ibis
super().test_select_with_neighbor()
except NotImplementedError:
raise SkipTest("Not supported")

if not IbisInterface.has_rowid():

def test_dataset_iloc_slice_rows_slice_cols(self):
Expand Down
10 changes: 8 additions & 2 deletions holoviews/tests/operation/test_downsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
except ImportError:
tsdownsample = None

algorithms = _ALGORITHMS.copy()
algorithms.pop('viewport', None) # viewport return slice(len(data)) no matter the width

@pytest.mark.parametrize("plottype", ["overlay", "ndoverlay"])
def test_downsample1d_multi(plottype):
Expand All @@ -26,7 +28,7 @@ def test_downsample1d_multi(plottype):
assert value.size == downsample1d.width


@pytest.mark.parametrize("algorithm", _ALGORITHMS.values(), ids=_ALGORITHMS)
@pytest.mark.parametrize("algorithm", algorithms.values(), ids=algorithms)
def test_downsample_algorithm(algorithm, unimport):
unimport("tsdownsample")
x = np.arange(1000)
Expand All @@ -37,14 +39,18 @@ def test_downsample_algorithm(algorithm, unimport):
except NotImplementedError:
pytest.skip("not testing tsdownsample algorithms")
else:
if isinstance(result, slice):
result = x[result]
assert result.size == width


@pytest.mark.skipif(not tsdownsample, reason="tsdownsample not installed")
@pytest.mark.parametrize("algorithm", _ALGORITHMS.values(), ids=_ALGORITHMS)
@pytest.mark.parametrize("algorithm", algorithms.values(), ids=algorithms)
def test_downsample_algorithm_with_tsdownsample(algorithm):
x = np.arange(1000)
y = np.random.rand(1000)
width = 20
result = algorithm(x, y, width)
if isinstance(result, slice):
result = x[result]
assert result.size == width
Loading