Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow using dim expressions as accessors #4311

Merged
merged 7 commits into from
Mar 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions holoviews/tests/util/testtransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def setUp(self):

# Assertion helpers

def check_apply(self, expr, expected, skip_dask=False):
def check_apply(self, expr, expected, skip_dask=False, skip_no_index=False):
if np.isscalar(expected):
# Pandas input
self.assertEqual(
Expand All @@ -69,10 +69,11 @@ def check_apply(self, expr, expected, skip_dask=False):

# Check using dataset backed by pandas DataFrame
# keep_index=False
np.testing.assert_equal(
expr.apply(self.dataset),
expected.values
)
if not skip_no_index:
np.testing.assert_equal(
expr.apply(self.dataset),
expected.values
)
# keep_index=True
pd.testing.assert_series_equal(
expr.apply(self.dataset, keep_index=True),
Expand All @@ -87,20 +88,22 @@ def check_apply(self, expr, expected, skip_dask=False):
expected_dask = dd.from_pandas(expected, npartitions=2)

# keep_index=False, compute=False
da.assert_eq(
expr.apply(self.dataset_dask, compute=False), expected_dask.values
)
if not skip_no_index:
da.assert_eq(
expr.apply(self.dataset_dask, compute=False), expected_dask.values
)
# keep_index=True, compute=False
dd.assert_eq(
expr.apply(self.dataset_dask, keep_index=True, compute=False),
expected_dask,
check_names=False
)
# keep_index=False, compute=True
np.testing.assert_equal(
expr.apply(self.dataset_dask, compute=True),
expected_dask.values.compute()
)
if not skip_no_index:
np.testing.assert_equal(
expr.apply(self.dataset_dask, compute=True),
expected_dask.values.compute()
)
# keep_index=True, compute=True
pd.testing.assert_series_equal(
expr.apply(self.dataset_dask, keep_index=True, compute=True),
Expand Down Expand Up @@ -226,6 +229,10 @@ def test_log10_transform(self):

# Custom functions

def test_str_astype(self):
expr = dim('int').str()
self.check_apply(expr, self.linear_ints.astype(str), skip_dask=True)

def test_norm_transform(self):
expr = dim('int').norm()
self.check_apply(expr, (self.linear_ints-1)/9.)
Expand Down Expand Up @@ -284,6 +291,12 @@ def test_categorize_transform_dict_with_default(self):
# We don't skip dask because results are stable across partitions
self.check_apply(expr, expected)

# Check accesors

def test_str_pandas_accessor(self):
expr = dim('categories').str.lower()
self.check_apply(expr, self.repeating.str.lower(), skip_no_index=True)

# Numpy functions

def test_digitize(self):
Expand Down Expand Up @@ -332,6 +345,10 @@ def test_multi_operator_expression_repr(self):
self.assertEqual(repr(((dim('float')-2)*3)**2),
"((dim('float')-2)*3)**2")

def test_accessor_repr(self):
self.assertEqual(repr(dim('date').dt.year),
"dim('date').dt.year")

# Applies method

def test_multi_dim_expression_applies(self):
Expand Down
88 changes: 65 additions & 23 deletions holoviews/util/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import operator
import sys

from functools import partial
from types import BuiltinFunctionType, BuiltinMethodType, FunctionType, MethodType

import numpy as np
Expand Down Expand Up @@ -163,7 +162,8 @@ def _python_isin(array, values):

function_types = (
BuiltinFunctionType, BuiltinMethodType, FunctionType,
MethodType, np.ufunc, iloc)
MethodType, np.ufunc, iloc
)


class dim(object):
Expand Down Expand Up @@ -194,7 +194,7 @@ class dim(object):
python_isin: 'isin',
astype: 'astype',
round_: 'round',
iloc: 'iloc'
iloc: 'iloc',
}

_numpy_funcs = {
Expand Down Expand Up @@ -271,10 +271,25 @@ def pipe(cls, func, *args, **kwargs):
def __hash__(self):
return hash(repr(self))

def __call__(self, *args, **kwargs):
if (not self.ops or not isinstance(self.ops[-1]['fn'], basestring) or
'accessor' not in self.ops[-1]['kwargs']):
raise ValueError("Cannot use __call__ method on dim expression "
"which is not an accessor. Ensure that you only "
"call a dim expression, which was created by "
"accessing an attribute that does not exist "
"on an existing dim expression.")
op = self.ops[-1]
if op['fn'] == 'str':
new_op = dict(op, fn=astype, args=(str,), kwargs={})
else:
new_op = dict(op, args=args, kwargs=kwargs)
return self.clone(self.dimension, self.ops[:-1]+[new_op])

def __getattr__(self, attr):
if attr in self.__dict__:
return self.__dict__[attr]
return partial(self.method, attr)
return dim(self, attr, accessor=True)

@property
def params(self):
Expand Down Expand Up @@ -428,13 +443,14 @@ def norm(self, limits=None):
kwargs = {'min': limits[0], 'max': limits[1]}
return dim(self, norm, **kwargs)

@property
def str(self):
"Casts values to strings."
return self.astype(str)
"Casts values to strings or provides str accessor."
return dim(self, 'str', accessor=True)

# Other methods

def applies(self, dataset):
def applies(self, dataset, strict=False):
"""
Determines whether the dim transform can be applied to the
Dataset, i.e. whether all referenced dimensions can be
Expand All @@ -445,9 +461,10 @@ def applies(self, dataset):
if isinstance(self.dimension, dim):
applies = self.dimension.applies(dataset)
else:
applies = dataset.get_dimension(self.dimension) is not None
lookup = self.dimension if strict else self.dimension.name
applies = dataset.get_dimension(lookup) is not None
if isinstance(dataset, Graph) and not applies:
applies = dataset.nodes.get_dimension(self.dimension) is not None
applies = dataset.nodes.get_dimension(lookup) is not None
for op in self.ops:
args = op.get('args')
if not args:
Expand Down Expand Up @@ -510,7 +527,14 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False,
if 'axis' not in kwargs and not isinstance(fn, np.ufunc):
kwargs['axis'] = None
fn = fn_name
fn_args = [] if isinstance(fn, basestring) else [data]

if isinstance(fn, basestring):
accessor = kwargs.pop('accessor', None)
fn_args = []
else:
accessor = False
fn_args = [data]

for arg in args:
if isinstance(arg, dim):
arg = arg.apply(
Expand Down Expand Up @@ -548,34 +572,44 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False,
elif isinstance(fn, basestring):
method = getattr(data, fn, None)
if method is None:
mtype = 'attribute' if accessor else 'method'
raise AttributeError(
"%r could not be applied to '%r', '%s' method "
"%r could not be applied to '%r', '%s' %s "
"does not exist on %s type."
% (self, dataset, fn, type(data).__name__)
% (self, dataset, fn, mtype, type(data).__name__)
)
try:
data = method(*args, **kwargs)
except Exception as e:
if 'axis' in kwargs:
kwargs.pop('axis')
if accessor:
data = method
else:
try:
data = method(*args, **kwargs)
else:
raise e
except Exception as e:
if 'axis' in kwargs:
kwargs.pop('axis')
data = method(*args, **kwargs)
else:
raise e
else:
data = fn(*args, **kwargs)
return data

def __repr__(self):
op_repr = "'%s'" % self.dimension
accessor = False
for i, o in enumerate(self.ops):
if i == 0:
prev = 'dim({repr}'
elif accessor:
prev = '{repr}'
else:
prev = '({repr}'
fn = o['fn']
ufunc = isinstance(fn, np.ufunc)
args = ', '.join([repr(r) for r in o['args']]) if o['args'] else ''
kwargs = sorted(o['kwargs'].items(), key=operator.itemgetter(0))
kwargs = o['kwargs']
prev_accessor = accessor
accessor = kwargs.pop('accessor', None)
kwargs = sorted(kwargs.items(), key=operator.itemgetter(0))
kwargs = '%s' % ', '.join(['%s=%r' % item for item in kwargs]) if kwargs else ''
if fn in self._binary_funcs:
fn_name = self._binary_funcs[o['fn']]
Expand All @@ -597,7 +631,11 @@ def __repr__(self):
fn_name = self._builtin_funcs[fn]
format_string = '{fn}'+prev
elif isinstance(fn, basestring):
format_string = prev+').{fn}('
if accessor:
sep = '' if op_repr.endswith(')') or prev_accessor else ')'
format_string = prev+sep+'.{fn}'
else:
format_string = prev+').{fn}('
elif fn in self._numpy_funcs:
fn_name = self._numpy_funcs[fn]
format_string = prev+').{fn}('
Expand All @@ -615,14 +653,18 @@ def __repr__(self):
if fn_name in dir(np):
format_string = '.'.join([self._namespaces['numpy'], format_string])
else:
format_string = 'dim(' + prev+', {fn}'
if args:
format_string = prev+', {fn}'
if accessor:
pass
elif args:
if not format_string.endswith('('):
format_string += ', '
format_string += '{args}'
if kwargs:
format_string += ', {kwargs}'
elif kwargs:
if not format_string.endswith('('):
format_string += ', '
format_string += '{kwargs}'
op_repr = format_string.format(fn=fn_name, repr=op_repr,
args=args, kwargs=kwargs)
Expand Down