Skip to content

Commit

Permalink
Improvements for kde operation error, dimension and range handling (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr authored Nov 11, 2017
1 parent d77cee4 commit c17bc42
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 7 deletions.
39 changes: 32 additions & 7 deletions holoviews/operation/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,23 +85,33 @@ def _process(self, element, key=None):
if self.p.dimension:
selected_dim = element.get_dimension(self.p.dimension)
else:
selected_dim = [d.name for d in element.vdims + element.kdims][0]
dimensions = element.vdims+element.kdims
if not dimensions:
raise ValueError("%s element does not declare any dimensions "
"to compute the kernel density estimate on." %
type(element).__name__)
selected_dim = dimensions[0]
vdim_name = '{}_density'.format(selected_dim.name)
vdim_label = '{} Density'.format(selected_dim.label)
vdims = [Dimension(vdim_nam, label=vdim_label)]
vdims = [Dimension(vdim_name, label=vdim_label)]

data = element.dimension_values(selected_dim)
bin_range = self.p.bin_range or element.range(selected_dim)
if bin_range == (0, 0) or any(not np.isfinite(r) for r in bin_range):
bin_range = (0, 1)
elif bin_range[0] == bin_range[1]:
bin_range = (bin_range[0]-0.5, bin_range[1]+0.5)

data = data[np.isfinite(data)]
if len(data):
if len(data) > 1:
kde = stats.gaussian_kde(data)
if self.p.bandwidth:
kde.set_bandwidth(self.p.bandwidth)
bw = kde.scotts_factor() * data.std(ddof=1)
xs = _kde_support(bin_range, bw, self.p.n_samples, self.p.cut, selected_dim.range)
if self.p.bin_range:
xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples)
else:
xs = _kde_support(bin_range, bw, self.p.n_samples, self.p.cut, selected_dim.range)
ys = kde.evaluate(xs)
else:
xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples)
Expand Down Expand Up @@ -159,6 +169,9 @@ def _process(self, element, key=None):
except ImportError:
raise ImportError('%s operation requires SciPy to be installed.' % type(self).__name__)

if len(element.dimensions()) < 2:
raise ValueError("bivariate_kde can only be computed on elements "
"declaring at least two dimensions.")
xdim, ydim = element.dimensions()[:2]
params = {}
if isinstance(element, Bivariate):
Expand All @@ -174,15 +187,27 @@ def _process(self, element, key=None):
ymin, ymax = self.p.y_range or element.range(1)
if any(not np.isfinite(v) for v in (xmin, xmax)):
xmin, xmax = -0.5, 0.5
elif xmin == xmax:
xmin, xmax = xmin-0.5, xmax+0.5
if any(not np.isfinite(v) for v in (ymin, ymax)):
ymin, ymax = -0.5, 0.5
if len(element) > 1:
elif ymin == ymax:
ymin, ymax = ymin-0.5, ymax+0.5

data = data[:, np.isfinite(data).min(axis=0)]
if data.shape[1] > 1:
kde = stats.gaussian_kde(data)
if self.p.bandwidth:
kde.set_bandwidth(self.p.bandwidth)
bw = kde.scotts_factor() * data.std(ddof=1)
xs = _kde_support((xmin, xmax), bw, self.p.n_samples, self.p.cut, xdim.range)
ys = _kde_support((ymin, ymax), bw, self.p.n_samples, self.p.cut, ydim.range)
if self.p.x_range:
xs = np.linspace(xmin, xmax, self.p.n_samples)
else:
xs = _kde_support((xmin, xmax), bw, self.p.n_samples, self.p.cut, xdim.range)
if self.p.y_range:
ys = np.linspace(ymin, ymax, self.p.n_samples)
else:
ys = _kde_support((ymin, ymax), bw, self.p.n_samples, self.p.cut, ydim.range)
xx, yy = cartesian_product([xs, ys], False)
positions = np.vstack([xx.ravel(), yy.ravel()])
f = np.reshape(kde(positions).T, xx.shape)
Expand Down
56 changes: 56 additions & 0 deletions tests/teststatsoperations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from unittest import SkipTest
from nose.plugins.attrib import attr

try:
import scipy
except:
raise SkipTest('SciPy not available')

import numpy as np

from holoviews import Distribution, Bivariate, Dataset, Area, Image
from holoviews.element.comparison import ComparisonTestCase
from holoviews.operation.stats import (univariate_kde, bivariate_kde)


class KDEOperationTests(ComparisonTestCase):
"""
Tests for the various timeseries operations including rolling,
resample and rolling_outliers_std.
"""

def setUp(self):
self.values = np.arange(4)
self.dist = Distribution(self.values)
self.nans = np.full(5, np.NaN)
self.values2d = [(i, i/10) for i in np.linspace(0, 4, 10)]
self.bivariate = Bivariate(self.values2d)
self.dist_nans = Distribution(self.nans)
self.bivariate_nans = Bivariate(np.column_stack([self.nans, self.nans]))

def test_univariate_kde(self):
kde = univariate_kde(self.dist, n_samples=5, bin_range=(0, 4))
xs = np.arange(5)
ys = [0.17594505, 0.23548218, 0.23548218, 0.17594505, 0.0740306]
area = Area((xs, ys), 'Value', ('Value_density', 'Value Density'))
self.assertEqual(kde, area)

def test_univariate_kde_nans(self):
kde = univariate_kde(self.dist_nans, n_samples=5, bin_range=(0, 4))
xs = np.arange(5)
ys = [0, 0, 0, 0, 0]
area = Area((xs, ys), 'Value', ('Value_density', 'Value Density'))
self.assertEqual(kde, area)

def test_bivariate_kde(self):
kde = bivariate_kde(self.bivariate, n_samples=2, x_range=(0, 4),
y_range=(0, 4), contours=False)
img = Image(np.array([[0, 0], [27711861.782675, 0]]),
bounds=(-2, -2, 6, 6), vdims=['Density'])
self.assertEqual(kde, img)

def test_bivariate_kde_nans(self):
kde = bivariate_kde(self.bivariate_nans, n_samples=2, x_range=(0, 4),
y_range=(0, 4), contours=False)
img = Image(np.zeros((2, 2)), bounds=(-2, -2, 6, 6), vdims=['Density'])
self.assertEqual(kde, img)

0 comments on commit c17bc42

Please sign in to comment.