From 8ea96dd24f20dea0a1632adf491221b861ce97e9 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Tue, 7 Nov 2017 19:11:38 +0000
Subject: [PATCH 1/4] Fixes and improvements for kde operation error, dimension
and range handling
---
holoviews/operation/stats.py | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py
index 60e84c04ba..76d9ab4ef7 100644
--- a/holoviews/operation/stats.py
+++ b/holoviews/operation/stats.py
@@ -85,18 +85,25 @@ def _process(self, element, key=None):
if self.p.dimension:
selected_dim = element.get_dimension(self.p.dimension)
else:
- selected_dim = [d.name for d in element.vdims + element.kdims][0]
+ dimensions = element.vdims+element.kdims
+ if not dimensions:
+ raise ValueError("%s element does not declare any dimensions "
+ "to compute the kernel density estimate on." %
+ type(element).__name__)
+ selected_dim = dimensions[0]
vdim_name = '{}_density'.format(selected_dim.name)
vdim_label = '{} Density'.format(selected_dim.label)
- vdims = [Dimension(vdim_nam, label=vdim_label)]
+ vdims = [Dimension(vdim_name, label=vdim_label)]
data = element.dimension_values(selected_dim)
bin_range = self.p.bin_range or element.range(selected_dim)
if bin_range == (0, 0) or any(not np.isfinite(r) for r in bin_range):
bin_range = (0, 1)
+ elif bin_range[0] == bin_range[1]:
+ bin_range = (bin_range[0]-0.5, bin_range[1]+0.5)
data = data[np.isfinite(data)]
- if len(data):
+ if len(data) > 1:
kde = stats.gaussian_kde(data)
if self.p.bandwidth:
kde.set_bandwidth(self.p.bandwidth)
@@ -159,6 +166,9 @@ def _process(self, element, key=None):
except ImportError:
raise ImportError('%s operation requires SciPy to be installed.' % type(self).__name__)
+ if len(element.dimensions()) < 2:
+ raise ValueError("bivariate_kde can only be computed on elements "
+ "declaring at least two dimensions.")
xdim, ydim = element.dimensions()[:2]
params = {}
if isinstance(element, Bivariate):
@@ -174,8 +184,13 @@ def _process(self, element, key=None):
ymin, ymax = self.p.y_range or element.range(1)
if any(not np.isfinite(v) for v in (xmin, xmax)):
xmin, xmax = -0.5, 0.5
+ elif xmin == xmax:
+ xmin, xmax = xmin-0.5, xmax+0.5
if any(not np.isfinite(v) for v in (ymin, ymax)):
ymin, ymax = -0.5, 0.5
+ elif ymin == ymax:
+ ymin, ymax = ymin-0.5, ymax+0.5
+
if len(element) > 1:
kde = stats.gaussian_kde(data)
if self.p.bandwidth:
From e599b99d31022ee9d32602c5ea3fd0a5fdb592e3 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Thu, 9 Nov 2017 18:09:50 +0000
Subject: [PATCH 2/4] Added handling of nans for bivariate element
---
holoviews/operation/stats.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py
index 76d9ab4ef7..6c5f713955 100644
--- a/holoviews/operation/stats.py
+++ b/holoviews/operation/stats.py
@@ -191,7 +191,8 @@ def _process(self, element, key=None):
elif ymin == ymax:
ymin, ymax = ymin-0.5, ymax+0.5
- if len(element) > 1:
+ data = data[:, np.isfinite(data).min(axis=0)]
+ if len(data) > 1:
kde = stats.gaussian_kde(data)
if self.p.bandwidth:
kde.set_bandwidth(self.p.bandwidth)
From 672cbab2080d6ef4f6b7291272f71dd69a09fd76 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sat, 11 Nov 2017 13:52:45 +0000
Subject: [PATCH 3/4] Fixes for stats operation explicit range handling
---
holoviews/operation/stats.py | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py
index 6c5f713955..14188cc1ef 100644
--- a/holoviews/operation/stats.py
+++ b/holoviews/operation/stats.py
@@ -108,7 +108,10 @@ def _process(self, element, key=None):
if self.p.bandwidth:
kde.set_bandwidth(self.p.bandwidth)
bw = kde.scotts_factor() * data.std(ddof=1)
- xs = _kde_support(bin_range, bw, self.p.n_samples, self.p.cut, selected_dim.range)
+ if self.p.bin_range:
+ xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples)
+ else:
+ xs = _kde_support(bin_range, bw, self.p.n_samples, self.p.cut, selected_dim.range)
ys = kde.evaluate(xs)
else:
xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples)
@@ -192,13 +195,19 @@ def _process(self, element, key=None):
ymin, ymax = ymin-0.5, ymax+0.5
data = data[:, np.isfinite(data).min(axis=0)]
- if len(data) > 1:
+ if data.shape[1] > 1:
kde = stats.gaussian_kde(data)
if self.p.bandwidth:
kde.set_bandwidth(self.p.bandwidth)
bw = kde.scotts_factor() * data.std(ddof=1)
- xs = _kde_support((xmin, xmax), bw, self.p.n_samples, self.p.cut, xdim.range)
- ys = _kde_support((ymin, ymax), bw, self.p.n_samples, self.p.cut, ydim.range)
+ if self.p.x_range:
+ xs = np.linspace(xmin, xmax, self.p.n_samples)
+ else:
+ xs = _kde_support((xmin, xmax), bw, self.p.n_samples, self.p.cut, xdim.range)
+ if self.p.y_range:
+ ys = np.linspace(ymin, ymax, self.p.n_samples)
+ else:
+ ys = _kde_support((ymin, ymax), bw, self.p.n_samples, self.p.cut, ydim.range)
xx, yy = cartesian_product([xs, ys], False)
positions = np.vstack([xx.ravel(), yy.ravel()])
f = np.reshape(kde(positions).T, xx.shape)
From fac157ef71485d3c35f557395133fac2fb7ae548 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sat, 11 Nov 2017 13:53:07 +0000
Subject: [PATCH 4/4] Added tests for stats operations
---
tests/teststatsoperations.py | 56 ++++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)
create mode 100644 tests/teststatsoperations.py
diff --git a/tests/teststatsoperations.py b/tests/teststatsoperations.py
new file mode 100644
index 0000000000..f98f3cfdff
--- /dev/null
+++ b/tests/teststatsoperations.py
@@ -0,0 +1,56 @@
+from unittest import SkipTest
+from nose.plugins.attrib import attr
+
+try:
+ import scipy
+except:
+ raise SkipTest('SciPy not available')
+
+import numpy as np
+
+from holoviews import Distribution, Bivariate, Dataset, Area, Image
+from holoviews.element.comparison import ComparisonTestCase
+from holoviews.operation.stats import (univariate_kde, bivariate_kde)
+
+
+class KDEOperationTests(ComparisonTestCase):
+ """
+ Tests for the various timeseries operations including rolling,
+ resample and rolling_outliers_std.
+ """
+
+ def setUp(self):
+ self.values = np.arange(4)
+ self.dist = Distribution(self.values)
+ self.nans = np.full(5, np.NaN)
+ self.values2d = [(i, i/10) for i in np.linspace(0, 4, 10)]
+ self.bivariate = Bivariate(self.values2d)
+ self.dist_nans = Distribution(self.nans)
+ self.bivariate_nans = Bivariate(np.column_stack([self.nans, self.nans]))
+
+ def test_univariate_kde(self):
+ kde = univariate_kde(self.dist, n_samples=5, bin_range=(0, 4))
+ xs = np.arange(5)
+ ys = [0.17594505, 0.23548218, 0.23548218, 0.17594505, 0.0740306]
+ area = Area((xs, ys), 'Value', ('Value_density', 'Value Density'))
+ self.assertEqual(kde, area)
+
+ def test_univariate_kde_nans(self):
+ kde = univariate_kde(self.dist_nans, n_samples=5, bin_range=(0, 4))
+ xs = np.arange(5)
+ ys = [0, 0, 0, 0, 0]
+ area = Area((xs, ys), 'Value', ('Value_density', 'Value Density'))
+ self.assertEqual(kde, area)
+
+ def test_bivariate_kde(self):
+ kde = bivariate_kde(self.bivariate, n_samples=2, x_range=(0, 4),
+ y_range=(0, 4), contours=False)
+ img = Image(np.array([[0, 0], [27711861.782675, 0]]),
+ bounds=(-2, -2, 6, 6), vdims=['Density'])
+ self.assertEqual(kde, img)
+
+ def test_bivariate_kde_nans(self):
+ kde = bivariate_kde(self.bivariate_nans, n_samples=2, x_range=(0, 4),
+ y_range=(0, 4), contours=False)
+ img = Image(np.zeros((2, 2)), bounds=(-2, -2, 6, 6), vdims=['Density'])
+ self.assertEqual(kde, img)