From b76677c912d31a95a29b2f71244366bd83353168 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 22 Mar 2020 20:17:58 +0100 Subject: [PATCH 1/7] Allow using dim expressions as accessors --- holoviews/util/transform.py | 49 +++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py index 1a48b2966c..fbebdd6981 100644 --- a/holoviews/util/transform.py +++ b/holoviews/util/transform.py @@ -163,7 +163,8 @@ def _python_isin(array, values): function_types = ( BuiltinFunctionType, BuiltinMethodType, FunctionType, - MethodType, np.ufunc, iloc) + MethodType, np.ufunc, iloc +) class dim(object): @@ -194,7 +195,7 @@ class dim(object): python_isin: 'isin', astype: 'astype', round_: 'round', - iloc: 'iloc' + iloc: 'iloc', } _numpy_funcs = { @@ -271,10 +272,21 @@ def pipe(cls, func, *args, **kwargs): def __hash__(self): return hash(repr(self)) + def __call__(self, *args, **kwargs): + if (not self.opts or not isinstance(self.ops[-1]['fn'], basestring) or + 'accessor' not in self.opts[-1]['kwargs']): + raise ValueError("Cannot use __call__ method on dim expression " + "which is not an accessor. Ensure that you only " + "call a dim expression, which was created by " + "accessing an attribute that does not exist " + "on an existing dim expression.") + new_op = dict(self.ops[-1], args=args, kwargs=kwargs) + return self.clone(self.dimension, self.ops[:-1]+[new_op]) + def __getattr__(self, attr): if attr in self.__dict__: return self.__dict__[attr] - return partial(self.method, attr) + return dim(self, attr, accessor=True) @property def params(self): @@ -510,7 +522,14 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False, if 'axis' not in kwargs and not isinstance(fn, np.ufunc): kwargs['axis'] = None fn = fn_name - fn_args = [] if isinstance(fn, basestring) else [data] + + if isinstance(fn, basestring): + accessor = kwargs.pop('accessor', None) + fn_args = [data] + else: + accessor = False + fn_args = [data] + for arg in args: if isinstance(arg, dim): arg = arg.apply( @@ -548,19 +567,23 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False, elif isinstance(fn, basestring): method = getattr(data, fn, None) if method is None: + mtype = 'attribute' if accessor else 'method' raise AttributeError( - "%r could not be applied to '%r', '%s' method " + "%r could not be applied to '%r', '%s' %s" "does not exist on %s type." - % (self, dataset, fn, type(data).__name__) + % (self, dataset, fn, mtype, type(data).__name__) ) - try: - data = method(*args, **kwargs) - except Exception as e: - if 'axis' in kwargs: - kwargs.pop('axis') + if accessor: + data = method + else: + try: data = method(*args, **kwargs) - else: - raise e + except Exception as e: + if 'axis' in kwargs: + kwargs.pop('axis') + data = method(*args, **kwargs) + else: + raise e else: data = fn(*args, **kwargs) return data From 26e6c4b468d80c643ff34d1eaba3ad63a2aa45a0 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 22 Mar 2020 20:32:34 +0100 Subject: [PATCH 2/7] Improved repr --- holoviews/util/transform.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py index fbebdd6981..d42ac23390 100644 --- a/holoviews/util/transform.py +++ b/holoviews/util/transform.py @@ -590,15 +590,21 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False, def __repr__(self): op_repr = "'%s'" % self.dimension + accessor = False for i, o in enumerate(self.ops): if i == 0: prev = 'dim({repr}' + elif accessor: + prev = '{repr}' else: prev = '({repr}' fn = o['fn'] ufunc = isinstance(fn, np.ufunc) args = ', '.join([repr(r) for r in o['args']]) if o['args'] else '' - kwargs = sorted(o['kwargs'].items(), key=operator.itemgetter(0)) + kwargs = o['kwargs'] + prev_accessor = accessor + accessor = kwargs.pop('accessor', None) + kwargs = sorted(kwargs.items(), key=operator.itemgetter(0)) kwargs = '%s' % ', '.join(['%s=%r' % item for item in kwargs]) if kwargs else '' if fn in self._binary_funcs: fn_name = self._binary_funcs[o['fn']] @@ -620,7 +626,11 @@ def __repr__(self): fn_name = self._builtin_funcs[fn] format_string = '{fn}'+prev elif isinstance(fn, basestring): - format_string = prev+').{fn}(' + if accessor: + sep = '' if op_repr.endswith(')') or prev_accessor else ')' + format_string = prev+sep+'.{fn}' + else: + format_string = prev+').{fn}(' elif fn in self._numpy_funcs: fn_name = self._numpy_funcs[fn] format_string = prev+').{fn}(' @@ -639,7 +649,9 @@ def __repr__(self): format_string = '.'.join([self._namespaces['numpy'], format_string]) else: format_string = 'dim(' + prev+', {fn}' - if args: + if accessor: + pass + elif args: if not format_string.endswith('('): format_string += ', ' format_string += '{args}' From f3a4e45cdafa89c551cd2ada0e2c3116baf22f61 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sun, 22 Mar 2020 21:03:37 +0100 Subject: [PATCH 3/7] Fixed flake --- holoviews/util/transform.py | 1 - 1 file changed, 1 deletion(-) diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py index d42ac23390..40d7b73c23 100644 --- a/holoviews/util/transform.py +++ b/holoviews/util/transform.py @@ -3,7 +3,6 @@ import operator import sys -from functools import partial from types import BuiltinFunctionType, BuiltinMethodType, FunctionType, MethodType import numpy as np From b0101470abf0e425dee76d6c9a0b83414032d045 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 23 Mar 2020 03:15:37 +0100 Subject: [PATCH 4/7] Fixed bugs --- holoviews/util/transform.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py index 40d7b73c23..bc47c7c738 100644 --- a/holoviews/util/transform.py +++ b/holoviews/util/transform.py @@ -272,8 +272,8 @@ def __hash__(self): return hash(repr(self)) def __call__(self, *args, **kwargs): - if (not self.opts or not isinstance(self.ops[-1]['fn'], basestring) or - 'accessor' not in self.opts[-1]['kwargs']): + if (not self.ops or not isinstance(self.ops[-1]['fn'], basestring) or + 'accessor' not in self.ops[-1]['kwargs']): raise ValueError("Cannot use __call__ method on dim expression " "which is not an accessor. Ensure that you only " "call a dim expression, which was created by " @@ -647,7 +647,7 @@ def __repr__(self): if fn_name in dir(np): format_string = '.'.join([self._namespaces['numpy'], format_string]) else: - format_string = 'dim(' + prev+', {fn}' + format_string = prev+', {fn}' if accessor: pass elif args: @@ -657,6 +657,8 @@ def __repr__(self): if kwargs: format_string += ', {kwargs}' elif kwargs: + if not format_string.endswith('('): + format_string += ', ' format_string += '{kwargs}' op_repr = format_string.format(fn=fn_name, repr=op_repr, args=args, kwargs=kwargs) From 474bead1acd2bc79341cd7fef15f155c5f67b3fb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 23 Mar 2020 13:06:06 +0100 Subject: [PATCH 5/7] Make default dim.applies check less strict --- holoviews/util/transform.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py index bc47c7c738..8114498e8a 100644 --- a/holoviews/util/transform.py +++ b/holoviews/util/transform.py @@ -445,7 +445,7 @@ def str(self): # Other methods - def applies(self, dataset): + def applies(self, dataset, strict=False): """ Determines whether the dim transform can be applied to the Dataset, i.e. whether all referenced dimensions can be @@ -456,9 +456,10 @@ def applies(self, dataset): if isinstance(self.dimension, dim): applies = self.dimension.applies(dataset) else: - applies = dataset.get_dimension(self.dimension) is not None + lookup = self.dimension if strict else self.dimension.name + applies = dataset.get_dimension(lookup) is not None if isinstance(dataset, Graph) and not applies: - applies = dataset.nodes.get_dimension(self.dimension) is not None + applies = dataset.nodes.get_dimension(lookup) is not None for op in self.ops: args = op.get('args') if not args: From 13d5039af623f501d4db4d228dc47f56d174c3db Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 23 Mar 2020 13:26:06 +0100 Subject: [PATCH 6/7] Support str accessor --- holoviews/util/transform.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py index 8114498e8a..a49475f116 100644 --- a/holoviews/util/transform.py +++ b/holoviews/util/transform.py @@ -279,7 +279,11 @@ def __call__(self, *args, **kwargs): "call a dim expression, which was created by " "accessing an attribute that does not exist " "on an existing dim expression.") - new_op = dict(self.ops[-1], args=args, kwargs=kwargs) + op = self.ops[-1] + if op['fn'] == 'str': + new_op = dict(op, fn=astype, args=(str,), kwargs={}) + else: + new_op = dict(op, args=args, kwargs=kwargs) return self.clone(self.dimension, self.ops[:-1]+[new_op]) def __getattr__(self, attr): @@ -439,9 +443,10 @@ def norm(self, limits=None): kwargs = {'min': limits[0], 'max': limits[1]} return dim(self, norm, **kwargs) + @property def str(self): - "Casts values to strings." - return self.astype(str) + "Casts values to strings or provides str accessor." + return dim(self, 'str', accessor=True) # Other methods @@ -525,7 +530,7 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False, if isinstance(fn, basestring): accessor = kwargs.pop('accessor', None) - fn_args = [data] + fn_args = [] else: accessor = False fn_args = [data] @@ -569,7 +574,7 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False, if method is None: mtype = 'attribute' if accessor else 'method' raise AttributeError( - "%r could not be applied to '%r', '%s' %s" + "%r could not be applied to '%r', '%s' %s " "does not exist on %s type." % (self, dataset, fn, mtype, type(data).__name__) ) From 34d1c2a015b6d787f831fcb347424e7f2e0de4ff Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 23 Mar 2020 13:26:14 +0100 Subject: [PATCH 7/7] Add tests --- holoviews/tests/util/testtransform.py | 41 +++++++++++++++++++-------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/holoviews/tests/util/testtransform.py b/holoviews/tests/util/testtransform.py index 1fc8fe1986..7d120b9467 100644 --- a/holoviews/tests/util/testtransform.py +++ b/holoviews/tests/util/testtransform.py @@ -42,7 +42,7 @@ def setUp(self): # Assertion helpers - def check_apply(self, expr, expected, skip_dask=False): + def check_apply(self, expr, expected, skip_dask=False, skip_no_index=False): if np.isscalar(expected): # Pandas input self.assertEqual( @@ -69,10 +69,11 @@ def check_apply(self, expr, expected, skip_dask=False): # Check using dataset backed by pandas DataFrame # keep_index=False - np.testing.assert_equal( - expr.apply(self.dataset), - expected.values - ) + if not skip_no_index: + np.testing.assert_equal( + expr.apply(self.dataset), + expected.values + ) # keep_index=True pd.testing.assert_series_equal( expr.apply(self.dataset, keep_index=True), @@ -87,9 +88,10 @@ def check_apply(self, expr, expected, skip_dask=False): expected_dask = dd.from_pandas(expected, npartitions=2) # keep_index=False, compute=False - da.assert_eq( - expr.apply(self.dataset_dask, compute=False), expected_dask.values - ) + if not skip_no_index: + da.assert_eq( + expr.apply(self.dataset_dask, compute=False), expected_dask.values + ) # keep_index=True, compute=False dd.assert_eq( expr.apply(self.dataset_dask, keep_index=True, compute=False), @@ -97,10 +99,11 @@ def check_apply(self, expr, expected, skip_dask=False): check_names=False ) # keep_index=False, compute=True - np.testing.assert_equal( - expr.apply(self.dataset_dask, compute=True), - expected_dask.values.compute() - ) + if not skip_no_index: + np.testing.assert_equal( + expr.apply(self.dataset_dask, compute=True), + expected_dask.values.compute() + ) # keep_index=True, compute=True pd.testing.assert_series_equal( expr.apply(self.dataset_dask, keep_index=True, compute=True), @@ -226,6 +229,10 @@ def test_log10_transform(self): # Custom functions + def test_str_astype(self): + expr = dim('int').str() + self.check_apply(expr, self.linear_ints.astype(str), skip_dask=True) + def test_norm_transform(self): expr = dim('int').norm() self.check_apply(expr, (self.linear_ints-1)/9.) @@ -284,6 +291,12 @@ def test_categorize_transform_dict_with_default(self): # We don't skip dask because results are stable across partitions self.check_apply(expr, expected) + # Check accesors + + def test_str_pandas_accessor(self): + expr = dim('categories').str.lower() + self.check_apply(expr, self.repeating.str.lower(), skip_no_index=True) + # Numpy functions def test_digitize(self): @@ -332,6 +345,10 @@ def test_multi_operator_expression_repr(self): self.assertEqual(repr(((dim('float')-2)*3)**2), "((dim('float')-2)*3)**2") + def test_accessor_repr(self): + self.assertEqual(repr(dim('date').dt.year), + "dim('date').dt.year") + # Applies method def test_multi_dim_expression_applies(self):