Allow using dim expressions as accessors (#4311)

holoviz · Mar 23, 2020 · b7fb3ec · b7fb3ec
1 parent fbbf409
commit b7fb3ec
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 35 deletions.
diff --git a/holoviews/tests/util/testtransform.py b/holoviews/tests/util/testtransform.py
@@ -42,7 +42,7 @@ def setUp(self):
 
     # Assertion helpers
 
-    def check_apply(self, expr, expected, skip_dask=False):
+    def check_apply(self, expr, expected, skip_dask=False, skip_no_index=False):
         if np.isscalar(expected):
             # Pandas input
             self.assertEqual(
@@ -69,10 +69,11 @@ def check_apply(self, expr, expected, skip_dask=False):
 
         # Check using dataset backed by pandas DataFrame
         # keep_index=False
-        np.testing.assert_equal(
-            expr.apply(self.dataset),
-            expected.values
-        )
+        if not skip_no_index:
+            np.testing.assert_equal(
+                expr.apply(self.dataset),
+                expected.values
+            )
         # keep_index=True
         pd.testing.assert_series_equal(
             expr.apply(self.dataset, keep_index=True),
@@ -87,20 +88,22 @@ def check_apply(self, expr, expected, skip_dask=False):
         expected_dask = dd.from_pandas(expected, npartitions=2)
 
         # keep_index=False, compute=False
-        da.assert_eq(
-            expr.apply(self.dataset_dask, compute=False), expected_dask.values
-        )
+        if not skip_no_index:
+            da.assert_eq(
+                expr.apply(self.dataset_dask, compute=False), expected_dask.values
+            )
         # keep_index=True, compute=False
         dd.assert_eq(
             expr.apply(self.dataset_dask, keep_index=True, compute=False),
             expected_dask,
             check_names=False
         )
         # keep_index=False, compute=True
-        np.testing.assert_equal(
-            expr.apply(self.dataset_dask, compute=True),
-            expected_dask.values.compute()
-        )
+        if not skip_no_index:
+            np.testing.assert_equal(
+                expr.apply(self.dataset_dask, compute=True),
+                expected_dask.values.compute()
+            )
         # keep_index=True, compute=True
         pd.testing.assert_series_equal(
             expr.apply(self.dataset_dask, keep_index=True, compute=True),
@@ -226,6 +229,10 @@ def test_log10_transform(self):
 
     # Custom functions
 
+    def test_str_astype(self):
+        expr = dim('int').str()
+        self.check_apply(expr, self.linear_ints.astype(str), skip_dask=True)
+
     def test_norm_transform(self):
         expr = dim('int').norm()
         self.check_apply(expr, (self.linear_ints-1)/9.)
@@ -284,6 +291,12 @@ def test_categorize_transform_dict_with_default(self):
         # We don't skip dask because results are stable across partitions
         self.check_apply(expr, expected)
 
+    # Check accesors
+
+    def test_str_pandas_accessor(self):
+        expr = dim('categories').str.lower()
+        self.check_apply(expr, self.repeating.str.lower(), skip_no_index=True)
+
     # Numpy functions
 
     def test_digitize(self):
@@ -332,6 +345,10 @@ def test_multi_operator_expression_repr(self):
         self.assertEqual(repr(((dim('float')-2)*3)**2),
                          "((dim('float')-2)*3)**2")
 
+    def test_accessor_repr(self):
+        self.assertEqual(repr(dim('date').dt.year),
+                         "dim('date').dt.year")
+
     # Applies method
 
     def test_multi_dim_expression_applies(self):

diff --git a/holoviews/util/transform.py b/holoviews/util/transform.py
@@ -3,7 +3,6 @@
 import operator
 import sys
 
-from functools import partial
 from types import BuiltinFunctionType, BuiltinMethodType, FunctionType, MethodType
 
 import numpy as np
@@ -163,7 +162,8 @@ def _python_isin(array, values):
 
 function_types = (
     BuiltinFunctionType, BuiltinMethodType, FunctionType,
-    MethodType, np.ufunc, iloc)
+    MethodType, np.ufunc, iloc
+)
 
 
 class dim(object):
@@ -194,7 +194,7 @@ class dim(object):
         python_isin: 'isin',
         astype: 'astype',
         round_: 'round',
-        iloc: 'iloc'
+        iloc: 'iloc',
     }
 
     _numpy_funcs = {
@@ -271,10 +271,25 @@ def pipe(cls, func, *args, **kwargs):
     def __hash__(self):
         return hash(repr(self))
 
+    def __call__(self, *args, **kwargs):
+        if (not self.ops or not isinstance(self.ops[-1]['fn'], basestring) or
+            'accessor' not in self.ops[-1]['kwargs']):
+            raise ValueError("Cannot use __call__ method on dim expression "
+                             "which is not an accessor. Ensure that you only "
+                             "call a dim expression, which was created by "
+                             "accessing an attribute that does not exist "
+                             "on an existing dim expression.")
+        op = self.ops[-1]
+        if op['fn'] == 'str':
+            new_op = dict(op, fn=astype, args=(str,), kwargs={})
+        else:
+            new_op = dict(op, args=args, kwargs=kwargs)
+        return self.clone(self.dimension, self.ops[:-1]+[new_op])
+
     def __getattr__(self, attr):
         if attr in self.__dict__:
             return self.__dict__[attr]
-        return partial(self.method, attr)
+        return dim(self, attr, accessor=True)
 
     @property
     def params(self):
@@ -428,13 +443,14 @@ def norm(self, limits=None):
             kwargs = {'min': limits[0], 'max': limits[1]}
         return dim(self, norm, **kwargs)
 
+    @property
     def str(self):
-        "Casts values to strings."
-        return self.astype(str)
+        "Casts values to strings or provides str accessor."
+        return dim(self, 'str', accessor=True)
 
     # Other methods
 
-    def applies(self, dataset):
+    def applies(self, dataset, strict=False):
         """
         Determines whether the dim transform can be applied to the
         Dataset, i.e. whether all referenced dimensions can be
@@ -445,9 +461,10 @@ def applies(self, dataset):
         if isinstance(self.dimension, dim):
             applies = self.dimension.applies(dataset)
         else:
-            applies = dataset.get_dimension(self.dimension) is not None
+            lookup = self.dimension if strict else self.dimension.name
+            applies = dataset.get_dimension(lookup) is not None
             if isinstance(dataset, Graph) and not applies:
-                applies = dataset.nodes.get_dimension(self.dimension) is not None
+                applies = dataset.nodes.get_dimension(lookup) is not None
         for op in self.ops:
             args = op.get('args')
             if not args:
@@ -510,7 +527,14 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False,
                 if 'axis' not in kwargs and not isinstance(fn, np.ufunc):
                     kwargs['axis'] = None
                 fn = fn_name
-            fn_args = [] if isinstance(fn, basestring) else [data]
+
+            if isinstance(fn, basestring):
+                accessor = kwargs.pop('accessor', None)
+                fn_args = []
+            else:
+                accessor = False
+                fn_args = [data]
+
             for arg in args:
                 if isinstance(arg, dim):
                     arg = arg.apply(
@@ -548,34 +572,44 @@ def apply(self, dataset, flat=False, expanded=None, ranges={}, all_values=False,
             elif isinstance(fn, basestring):
                 method = getattr(data, fn, None)
                 if method is None:
+                    mtype = 'attribute' if accessor else 'method'
                     raise AttributeError(
-                        "%r could not be applied to '%r', '%s' method "
+                        "%r could not be applied to '%r', '%s' %s "
                         "does not exist on %s type."
-                        % (self, dataset, fn, type(data).__name__)
+                        % (self, dataset, fn, mtype, type(data).__name__)
                     )
-                try:
-                    data = method(*args, **kwargs)
-                except Exception as e:
-                    if 'axis' in kwargs:
-                        kwargs.pop('axis')
+                if accessor:
+                    data = method
+                else:
+                    try:
                         data = method(*args, **kwargs)
-                    else:
-                        raise e
+                    except Exception as e:
+                        if 'axis' in kwargs:
+                            kwargs.pop('axis')
+                            data = method(*args, **kwargs)
+                        else:
+                            raise e
             else:
                 data = fn(*args, **kwargs)
         return data
 
     def __repr__(self):
         op_repr = "'%s'" % self.dimension
+        accessor = False
         for i, o in enumerate(self.ops):
             if i == 0:
                 prev = 'dim({repr}'
+            elif accessor:
+                prev = '{repr}'
             else:
                 prev = '({repr}'
             fn = o['fn']
             ufunc = isinstance(fn, np.ufunc)
             args = ', '.join([repr(r) for r in o['args']]) if o['args'] else ''
-            kwargs = sorted(o['kwargs'].items(), key=operator.itemgetter(0))
+            kwargs = o['kwargs']
+            prev_accessor = accessor
+            accessor = kwargs.pop('accessor', None)
+            kwargs = sorted(kwargs.items(), key=operator.itemgetter(0))
             kwargs = '%s' % ', '.join(['%s=%r' % item for item in kwargs]) if kwargs else ''
             if fn in self._binary_funcs:
                 fn_name = self._binary_funcs[o['fn']]
@@ -597,7 +631,11 @@ def __repr__(self):
                     fn_name = self._builtin_funcs[fn]
                     format_string = '{fn}'+prev
                 elif isinstance(fn, basestring):
-                    format_string = prev+').{fn}('
+                    if accessor:
+                        sep = '' if op_repr.endswith(')') or prev_accessor else ')'
+                        format_string = prev+sep+'.{fn}'
+                    else:
+                        format_string = prev+').{fn}('
                 elif fn in self._numpy_funcs:
                     fn_name = self._numpy_funcs[fn]
                     format_string = prev+').{fn}('
@@ -615,14 +653,18 @@ def __repr__(self):
                     if fn_name in dir(np):
                         format_string = '.'.join([self._namespaces['numpy'], format_string])
                 else:
-                    format_string = 'dim(' + prev+', {fn}'
-                if args:
+                    format_string = prev+', {fn}'
+                if accessor:
+                    pass
+                elif args:
                     if not format_string.endswith('('):
                         format_string += ', '
                     format_string += '{args}'
                     if kwargs:
                         format_string += ', {kwargs}'
                 elif kwargs:
+                    if not format_string.endswith('('):
+                        format_string += ', '
                     format_string += '{kwargs}'
             op_repr = format_string.format(fn=fn_name, repr=op_repr,
                                            args=args, kwargs=kwargs)