From 8ea96dd24f20dea0a1632adf491221b861ce97e9 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 7 Nov 2017 19:11:38 +0000 Subject: [PATCH 1/4] Fixes and improvements for kde operation error, dimension and range handling --- holoviews/operation/stats.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 60e84c04ba..76d9ab4ef7 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -85,18 +85,25 @@ def _process(self, element, key=None): if self.p.dimension: selected_dim = element.get_dimension(self.p.dimension) else: - selected_dim = [d.name for d in element.vdims + element.kdims][0] + dimensions = element.vdims+element.kdims + if not dimensions: + raise ValueError("%s element does not declare any dimensions " + "to compute the kernel density estimate on." % + type(element).__name__) + selected_dim = dimensions[0] vdim_name = '{}_density'.format(selected_dim.name) vdim_label = '{} Density'.format(selected_dim.label) - vdims = [Dimension(vdim_nam, label=vdim_label)] + vdims = [Dimension(vdim_name, label=vdim_label)] data = element.dimension_values(selected_dim) bin_range = self.p.bin_range or element.range(selected_dim) if bin_range == (0, 0) or any(not np.isfinite(r) for r in bin_range): bin_range = (0, 1) + elif bin_range[0] == bin_range[1]: + bin_range = (bin_range[0]-0.5, bin_range[1]+0.5) data = data[np.isfinite(data)] - if len(data): + if len(data) > 1: kde = stats.gaussian_kde(data) if self.p.bandwidth: kde.set_bandwidth(self.p.bandwidth) @@ -159,6 +166,9 @@ def _process(self, element, key=None): except ImportError: raise ImportError('%s operation requires SciPy to be installed.' % type(self).__name__) + if len(element.dimensions()) < 2: + raise ValueError("bivariate_kde can only be computed on elements " + "declaring at least two dimensions.") xdim, ydim = element.dimensions()[:2] params = {} if isinstance(element, Bivariate): @@ -174,8 +184,13 @@ def _process(self, element, key=None): ymin, ymax = self.p.y_range or element.range(1) if any(not np.isfinite(v) for v in (xmin, xmax)): xmin, xmax = -0.5, 0.5 + elif xmin == xmax: + xmin, xmax = xmin-0.5, xmax+0.5 if any(not np.isfinite(v) for v in (ymin, ymax)): ymin, ymax = -0.5, 0.5 + elif ymin == ymax: + ymin, ymax = ymin-0.5, ymax+0.5 + if len(element) > 1: kde = stats.gaussian_kde(data) if self.p.bandwidth: From e599b99d31022ee9d32602c5ea3fd0a5fdb592e3 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Thu, 9 Nov 2017 18:09:50 +0000 Subject: [PATCH 2/4] Added handling of nans for bivariate element --- holoviews/operation/stats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 76d9ab4ef7..6c5f713955 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -191,7 +191,8 @@ def _process(self, element, key=None): elif ymin == ymax: ymin, ymax = ymin-0.5, ymax+0.5 - if len(element) > 1: + data = data[:, np.isfinite(data).min(axis=0)] + if len(data) > 1: kde = stats.gaussian_kde(data) if self.p.bandwidth: kde.set_bandwidth(self.p.bandwidth) From 672cbab2080d6ef4f6b7291272f71dd69a09fd76 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 11 Nov 2017 13:52:45 +0000 Subject: [PATCH 3/4] Fixes for stats operation explicit range handling --- holoviews/operation/stats.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 6c5f713955..14188cc1ef 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -108,7 +108,10 @@ def _process(self, element, key=None): if self.p.bandwidth: kde.set_bandwidth(self.p.bandwidth) bw = kde.scotts_factor() * data.std(ddof=1) - xs = _kde_support(bin_range, bw, self.p.n_samples, self.p.cut, selected_dim.range) + if self.p.bin_range: + xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples) + else: + xs = _kde_support(bin_range, bw, self.p.n_samples, self.p.cut, selected_dim.range) ys = kde.evaluate(xs) else: xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples) @@ -192,13 +195,19 @@ def _process(self, element, key=None): ymin, ymax = ymin-0.5, ymax+0.5 data = data[:, np.isfinite(data).min(axis=0)] - if len(data) > 1: + if data.shape[1] > 1: kde = stats.gaussian_kde(data) if self.p.bandwidth: kde.set_bandwidth(self.p.bandwidth) bw = kde.scotts_factor() * data.std(ddof=1) - xs = _kde_support((xmin, xmax), bw, self.p.n_samples, self.p.cut, xdim.range) - ys = _kde_support((ymin, ymax), bw, self.p.n_samples, self.p.cut, ydim.range) + if self.p.x_range: + xs = np.linspace(xmin, xmax, self.p.n_samples) + else: + xs = _kde_support((xmin, xmax), bw, self.p.n_samples, self.p.cut, xdim.range) + if self.p.y_range: + ys = np.linspace(ymin, ymax, self.p.n_samples) + else: + ys = _kde_support((ymin, ymax), bw, self.p.n_samples, self.p.cut, ydim.range) xx, yy = cartesian_product([xs, ys], False) positions = np.vstack([xx.ravel(), yy.ravel()]) f = np.reshape(kde(positions).T, xx.shape) From fac157ef71485d3c35f557395133fac2fb7ae548 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 11 Nov 2017 13:53:07 +0000 Subject: [PATCH 4/4] Added tests for stats operations --- tests/teststatsoperations.py | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/teststatsoperations.py diff --git a/tests/teststatsoperations.py b/tests/teststatsoperations.py new file mode 100644 index 0000000000..f98f3cfdff --- /dev/null +++ b/tests/teststatsoperations.py @@ -0,0 +1,56 @@ +from unittest import SkipTest +from nose.plugins.attrib import attr + +try: + import scipy +except: + raise SkipTest('SciPy not available') + +import numpy as np + +from holoviews import Distribution, Bivariate, Dataset, Area, Image +from holoviews.element.comparison import ComparisonTestCase +from holoviews.operation.stats import (univariate_kde, bivariate_kde) + + +class KDEOperationTests(ComparisonTestCase): + """ + Tests for the various timeseries operations including rolling, + resample and rolling_outliers_std. + """ + + def setUp(self): + self.values = np.arange(4) + self.dist = Distribution(self.values) + self.nans = np.full(5, np.NaN) + self.values2d = [(i, i/10) for i in np.linspace(0, 4, 10)] + self.bivariate = Bivariate(self.values2d) + self.dist_nans = Distribution(self.nans) + self.bivariate_nans = Bivariate(np.column_stack([self.nans, self.nans])) + + def test_univariate_kde(self): + kde = univariate_kde(self.dist, n_samples=5, bin_range=(0, 4)) + xs = np.arange(5) + ys = [0.17594505, 0.23548218, 0.23548218, 0.17594505, 0.0740306] + area = Area((xs, ys), 'Value', ('Value_density', 'Value Density')) + self.assertEqual(kde, area) + + def test_univariate_kde_nans(self): + kde = univariate_kde(self.dist_nans, n_samples=5, bin_range=(0, 4)) + xs = np.arange(5) + ys = [0, 0, 0, 0, 0] + area = Area((xs, ys), 'Value', ('Value_density', 'Value Density')) + self.assertEqual(kde, area) + + def test_bivariate_kde(self): + kde = bivariate_kde(self.bivariate, n_samples=2, x_range=(0, 4), + y_range=(0, 4), contours=False) + img = Image(np.array([[0, 0], [27711861.782675, 0]]), + bounds=(-2, -2, 6, 6), vdims=['Density']) + self.assertEqual(kde, img) + + def test_bivariate_kde_nans(self): + kde = bivariate_kde(self.bivariate_nans, n_samples=2, x_range=(0, 4), + y_range=(0, 4), contours=False) + img = Image(np.zeros((2, 2)), bounds=(-2, -2, 6, 6), vdims=['Density']) + self.assertEqual(kde, img)