Skip to content

Commit

Permalink
Allow using dim expressions as accessors (#4311)
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr authored Mar 23, 2020
1 parent fbbf409 commit b7fb3ec
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 35 deletions.
41 changes: 29 additions & 12 deletions holoviews/tests/util/testtransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def setUp(self):

# Assertion helpers

def check_apply(self, expr, expected, skip_dask=False):
def check_apply(self, expr, expected, skip_dask=False, skip_no_index=False):
if np.isscalar(expected):
# Pandas input
self.assertEqual(
Expand All @@ -69,10 +69,11 @@ def check_apply(self, expr, expected, skip_dask=False):

# Check using dataset backed by pandas DataFrame
# keep_index=False
np.testing.assert_equal(
expr.apply(self.dataset),
expected.values
)
if not skip_no_index:
np.testing.assert_equal(
expr.apply(self.dataset),
expected.values
)
# keep_index=True
pd.testing.assert_series_equal(
expr.apply(self.dataset, keep_index=True),
Expand All @@ -87,20 +88,22 @@ def check_apply(self, expr, expected, skip_dask=False):
expected_dask = dd.from_pandas(expected, npartitions=2)

# keep_index=False, compute=False
da.assert_eq(
expr.apply(self.dataset_dask, compute=False), expected_dask.values
)
if not skip_no_index:
da.assert_eq(
expr.apply(self.dataset_dask, compute=False), expected_dask.values
)
# keep_index=True, compute=False
dd.assert_eq(
expr.apply(self.dataset_dask, keep_index=True, compute=False),
expected_dask,
check_names=False
)
# keep_index=False, compute=True
np.testing.assert_equal(
expr.apply(self.dataset_dask, compute=True),
expected_dask.values.compute()
)
if not skip_no_index:
np.testing.assert_equal(
expr.apply(self.dataset_dask, compute=True),
expected_dask.values.compute()
)
# keep_index=True, compute=True
pd.testing.assert_series_equal(
expr.apply(self.dataset_dask, keep_index=True, compute=True),
Expand Down Expand Up @@ -226,6 +229,10 @@ def test_log10_transform(self):

# Custom functions

def test_str_astype(self):
expr = dim('int').str()
self.check_apply(expr, self.linear_ints.astype(str), skip_dask=True)

def test_norm_transform(self):
expr = dim('int').norm()
self.check_apply(expr, (self.linear_ints-1)/9.)
Expand Down Expand Up @@ -284,6 +291,12 @@ def test_categorize_transform_dict_with_default(self):
# We don't skip dask because results are stable across partitions
self.check_apply(expr, expected)

# Check accesors

def test_str_pandas_accessor(self):
expr = dim('categories').str.lower()
self.check_apply(expr, self.repeating.str.lower(), skip_no_index=True)

# Numpy functions

def test_digitize(self):
Expand Down Expand Up @@ -332,6 +345,10 @@ def test_multi_operator_expression_repr(self):
self.assertEqual(repr(((dim('float')-2)*3)**2),
"((dim('float')-2)*3)**2")

def test_accessor_repr(self):
self.assertEqual(repr(dim('date').dt.year),
"dim('date').dt.year")

# Applies method

def test_multi_dim_expression_applies(self):
Expand Down
88 changes: 65 additions & 23 deletions holoviews/util/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import operator
import sys

from functools import partial
from types import BuiltinFunctionType, BuiltinMethodType, FunctionType, MethodType

import numpy as np
Expand Down Expand Up @@ -163,7 +162,8 @@ def _python_isin(array, values):

function_types = (
BuiltinFunctionType, BuiltinMethodType, FunctionType,
MethodType, np.ufunc, iloc)
MethodType, np.ufunc, iloc
)


class dim(object):
Expand Down Expand Up @@ -194,7 +194,7 @@ class dim(object):
python_isin: 'isin',
astype: 'astype',
round_: 'round',
iloc: 'iloc'
iloc: 'iloc',
}

_numpy_funcs = {
Expand Down Expand Up @@ -271,10 +271,25 @@ def pipe(cls, func, *args, **kwargs):
def __hash__(self):
return hash(repr(self))

def __call__(self, *args, **kwargs):
if (not self.ops or not isinstance(self.ops[-1]['fn'], basestring) or
'accessor' not in self.ops[-1]['kwargs']):
raise ValueError("Cannot use __call__ method on dim expression "
"which is not an accessor. Ensure that you only "
"call a dim expression, which was created by "
"accessing an attribute that does not exist "
"on an existing dim expression.")
op = self.ops[-1]
if op['fn'] == 'str':
new_op = dict(op, fn=astype, args=(str,), kwargs={})
else:
new_op = dict(op, args=args, kwargs=kwargs)
return self.clone(self.dimension, self.ops[:-1]+[new_op])

def __getattr__(self, attr):
if attr in self.__dict__:
return self.__dict__[attr]
return partial(self.method, attr)
return dim(self, attr, accessor=True)

@property
def params(self):
Expand Down Expand Up @@ -428,13 +443,14 @@ def norm(self, limits=None):
kwargs = {'min': limits[0], 'max': limits[1]}
return dim(self, norm, **kwargs)

@property
def str(self):
"Casts values to strings."
return self.astype(str)
"Casts values to strings or provides str accessor."
return dim(self, 'str', accessor=True)

# Other methods

def applies(self, dataset):
def applies(self, dataset, strict=False):
"""
Determines whether the dim transform can be applied to the
Dataset, i.e. whether all referenced dimensions can be
Expand All @@ -445,9 +461,10 @@ def applies(self, dataset):
if isinstance(self.dimension, dim):
applies = self.dimension.applies(dataset)
else:
applies = dataset.get_dimension(self.dimension) is not None
lookup = self.dimension if strict else self.dimension.name
applies = dataset.get_dimension(lookup) is not None
if isinstance(dataset, Graph) and not applies:
applies = dataset.nodes.get_dimension(self.dimension) is not None
applies = dataset.nodes.get_dimension(lookup) is not None
for op in self.ops:
args = op.get('args')
if not args:
Expand Down Expand Up @@ -510,7 +527,14 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False,
if 'axis' not in kwargs and not isinstance(fn, np.ufunc):
kwargs['axis'] = None
fn = fn_name
fn_args = [] if isinstance(fn, basestring) else [data]

if isinstance(fn, basestring):
accessor = kwargs.pop('accessor', None)
fn_args = []
else:
accessor = False
fn_args = [data]

for arg in args:
if isinstance(arg, dim):
arg = arg.apply(
Expand Down Expand Up @@ -548,34 +572,44 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False,
elif isinstance(fn, basestring):
method = getattr(data, fn, None)
if method is None:
mtype = 'attribute' if accessor else 'method'
raise AttributeError(
"%r could not be applied to '%r', '%s' method "
"%r could not be applied to '%r', '%s' %s "
"does not exist on %s type."
% (self, dataset, fn, type(data).__name__)
% (self, dataset, fn, mtype, type(data).__name__)
)
try:
data = method(*args, **kwargs)
except Exception as e:
if 'axis' in kwargs:
kwargs.pop('axis')
if accessor:
data = method
else:
try:
data = method(*args, **kwargs)
else:
raise e
except Exception as e:
if 'axis' in kwargs:
kwargs.pop('axis')
data = method(*args, **kwargs)
else:
raise e
else:
data = fn(*args, **kwargs)
return data

def __repr__(self):
op_repr = "'%s'" % self.dimension
accessor = False
for i, o in enumerate(self.ops):
if i == 0:
prev = 'dim({repr}'
elif accessor:
prev = '{repr}'
else:
prev = '({repr}'
fn = o['fn']
ufunc = isinstance(fn, np.ufunc)
args = ', '.join([repr(r) for r in o['args']]) if o['args'] else ''
kwargs = sorted(o['kwargs'].items(), key=operator.itemgetter(0))
kwargs = o['kwargs']
prev_accessor = accessor
accessor = kwargs.pop('accessor', None)
kwargs = sorted(kwargs.items(), key=operator.itemgetter(0))
kwargs = '%s' % ', '.join(['%s=%r' % item for item in kwargs]) if kwargs else ''
if fn in self._binary_funcs:
fn_name = self._binary_funcs[o['fn']]
Expand All @@ -597,7 +631,11 @@ def __repr__(self):
fn_name = self._builtin_funcs[fn]
format_string = '{fn}'+prev
elif isinstance(fn, basestring):
format_string = prev+').{fn}('
if accessor:
sep = '' if op_repr.endswith(')') or prev_accessor else ')'
format_string = prev+sep+'.{fn}'
else:
format_string = prev+').{fn}('
elif fn in self._numpy_funcs:
fn_name = self._numpy_funcs[fn]
format_string = prev+').{fn}('
Expand All @@ -615,14 +653,18 @@ def __repr__(self):
if fn_name in dir(np):
format_string = '.'.join([self._namespaces['numpy'], format_string])
else:
format_string = 'dim(' + prev+', {fn}'
if args:
format_string = prev+', {fn}'
if accessor:
pass
elif args:
if not format_string.endswith('('):
format_string += ', '
format_string += '{args}'
if kwargs:
format_string += ', {kwargs}'
elif kwargs:
if not format_string.endswith('('):
format_string += ', '
format_string += '{kwargs}'
op_repr = format_string.format(fn=fn_name, repr=op_repr,
args=args, kwargs=kwargs)
Expand Down

0 comments on commit b7fb3ec

Please sign in to comment.