From 7982048146b0045e1827ac822e06e5d0ebf2d38a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 00:16:58 +0100 Subject: [PATCH 01/14] Implemented concatenation for most interfaces --- holoviews/core/data/__init__.py | 4 +++- holoviews/core/data/array.py | 9 +++++--- holoviews/core/data/dask.py | 12 ++++++++--- holoviews/core/data/dictionary.py | 25 +++++++++++----------- holoviews/core/data/grid.py | 35 +++++++++++++++++++++++++++---- holoviews/core/data/pandas.py | 11 +++++++--- holoviews/core/data/xarray.py | 9 +++----- 7 files changed, 73 insertions(+), 32 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index ec93f50a03..e059cd9386 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -17,12 +17,14 @@ from .multipath import MultiInterface # noqa (API import) from .image import ImageInterface # noqa (API import) +default_datatype = 'dictionary' datatypes = ['dictionary', 'grid'] try: import pandas as pd # noqa (Availability import) from .pandas import PandasInterface - datatypes = ['dataframe', 'dictionary', 'grid', 'array'] + default_datatype = 'dataframe' + datatypes = ['dataframe', 'dictionary', 'grid'] DFColumns = PandasInterface except ImportError: pd = None diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index 607fcbbe47..ea4da50c32 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -1,3 +1,4 @@ +from collections import defaultdict try: import itertools.izip as zip except ImportError: @@ -105,9 +106,11 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): @classmethod - def concat(cls, dataset_objs): - cast_objs = cls.cast(dataset_objs) - return np.concatenate([col.data for col in cast_objs]) + def concat(cls, datasets, dimensions, vdims): + from . import default_datatype + keys, datasets = zip(*datasets) + datasets = cls.cast(datasets, default_datatype) + return datasets[0].interface.concat(list(zip(keys, datasets)), dimensions) @classmethod diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py index 59e84332f5..37d1192a81 100644 --- a/holoviews/core/data/dask.py +++ b/holoviews/core/data/dask.py @@ -244,9 +244,15 @@ def add_dimension(cls, columns, dimension, dim_pos, values, vdim): return data @classmethod - def concat(cls, columns_objs): - cast_objs = cls.cast(columns_objs) - return dd.concat([col.data for col in cast_objs]) + def concat(cls, datasets, dimensions, vdims): + dataframes = [] + for key, ds in datasets: + data = ds.data.copy() + for d, k in zip(dimensions, key): + data[d.name] = k + dataframes.append(data) + template = datasets[0][1] + return dd.concat(dataframes) @classmethod def dframe(cls, columns, dimensions): diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index 4887c4d3d7..594b04c6a1 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -1,5 +1,4 @@ -from collections import OrderedDict - +from collections import OrderedDict, defaultdict try: import itertools.izip as zip except ImportError: @@ -185,17 +184,19 @@ def redim(cls, dataset, dimensions): renamed.append((k, v)) return OrderedDict(renamed) + @classmethod - def concat(cls, dataset_objs): - cast_objs = cls.cast(dataset_objs) - cols = set(tuple(c.data.keys()) for c in cast_objs) - if len(cols) != 1: - raise Exception("In order to concatenate, all Dataset objects " - "should have matching set of columns.") - concatenated = OrderedDict() - for column in cols.pop(): - concatenated[column] = np.concatenate([obj[column] for obj in cast_objs]) - return concatenated + def concat(cls, datasets, dimensions, vdims): + columns = defaultdict(list) + for key, ds in datasets: + for k, vals in ds.data.items(): + columns[k].append(vals) + for d, k in zip(dimensions, key): + columns[d.name].append(np.full(len(ds), k)) + + template = datasets[0][1] + dims = dimensions+template.dimensions() + return OrderedDict([(d.name, np.concatenate(columns[d.name])) for d in dims]) @classmethod diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 523904b2e4..e7495e4c34 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -17,13 +17,15 @@ except ImportError: da = None +def is_dask(array): + return da and isinstance(array, da.Array) from .dictionary import DictInterface from .interface import Interface, DataError from ..dimension import Dimension from ..element import Element from ..dimension import OrderedDict as cyODict -from ..ndmapping import NdMapping, item_check +from ..ndmapping import NdMapping, item_check, sorted_context from .. import util @@ -115,6 +117,31 @@ def init(cls, eltype, data, kdims, vdims): 'actual shape: %s' % (vdim, valid_shape, shape), cls) return data, {'kdims':kdims, 'vdims':vdims}, {} + @classmethod + def concat(cls, datasets, dimensions, vdims): + from . import Dataset + with sorted_context(False): + datasets = NdMapping(datasets, kdims=dimensions) + datasets = datasets.clone([(k, v.data if isinstance(v, Dataset) else v) + for k, v in datasets.data.items()]) + if len(datasets.kdims) > 1: + items = datasets.groupby(datasets.kdims[:-1]).data.items() + return cls.concat([(k, cls.concat(v, v.kdims, vdims=vdims)) for k, v in items], + datasets.kdims[:-1], vdims) + return cls.concat_dim(datasets, datasets.kdims[0], vdims) + + + @classmethod + def concat_dim(cls, datasets, dim, vdims): + values, grids = zip(*datasets.items()) + new_data = {k: v for k, v in grids[0].items() if k not in vdims} + new_data[dim.name] = np.array(values) + for vdim in vdims: + arrays = [grid[vdim.name] for grid in grids] + stack = np.stack if any(is_dask(arr) for arr in arrays) else da.stack + new_data[vdim.name] = stack(arrays, -1) + return new_data + @classmethod def irregular(cls, dataset, dim): @@ -541,9 +568,9 @@ def aggregate(cls, dataset, kdims, function, **kwargs): axes = tuple(dataset.ndims-dataset.get_dimension_index(kdim)-1 for kdim in dataset.kdims if kdim not in kdims) for vdim in dataset.vdims: - data[vdim.name] = np.atleast_1d(function(dataset.data[vdim.name], - axis=axes, **kwargs)) - + values = dataset.data[vdim.name] + atleast_1d = da.atleast_1d if is_dask(values) else np.atleast_1d + data[vdim.name] = atleast_1d(function(values, axis=axes, **kwargs)) return data diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index ba417fc630..85ce66a3d6 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -169,9 +169,14 @@ def range(cls, columns, dimension): @classmethod - def concat(cls, columns_objs): - cast_objs = cls.cast(columns_objs) - return pd.concat([col.data for col in cast_objs]) + def concat(cls, datasets, dimensions, vdims): + dataframes = [] + for key, ds in datasets: + data = ds.data.copy() + for d, k in zip(dimensions, key): + data[d.name] = k + dataframes.append(data) + return pd.concat(dataframes) @classmethod diff --git a/holoviews/core/data/xarray.py b/holoviews/core/data/xarray.py index 0c3bf6d7d3..f5b0946b68 100644 --- a/holoviews/core/data/xarray.py +++ b/holoviews/core/data/xarray.py @@ -360,13 +360,10 @@ def ndloc(cls, dataset, indices): else: return dataset.data.isel(**isel) - @classmethod - def concat(cls, dataset_objs): - #cast_objs = cls.cast(dataset_objs) - # Reimplement concat to automatically add dimensions - # once multi-dimensional concat has been added to xarray. - return xr.concat([col.data for col in dataset_objs], dim='concat_dim') + def concat_dim(cls, datasets, dim, vdims): + return xr.concat([ds.assign_coords(**{dim.name: c}) for c, ds in datasets.items()], + dim=dim.name) @classmethod def redim(cls, dataset, dimensions): From 37bfdbb0f4059e70727a7060a1bcec1138587fe2 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 00:17:55 +0100 Subject: [PATCH 02/14] Defined default datatype --- holoviews/core/data/__init__.py | 2 +- holoviews/element/util.py | 3 ++- holoviews/operation/element.py | 11 ++++------- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index e059cd9386..273edf8567 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -293,7 +293,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs): dimensions = dict(kdims=dims) if issubclass(self.interface, ArrayInterface) and np.asarray(dim_val).dtype != self.data.dtype: - element = self.clone(datatype=['pandas', 'dictionary']) + element = self.clone(datatype=[default_datatype]) data = element.interface.add_dimension(element, dimension, dim_pos, dim_val, vdim) else: data = self.interface.add_dimension(self, dimension, dim_pos, dim_val, vdim) diff --git a/holoviews/element/util.py b/holoviews/element/util.py index 0765601d2b..a682011f6a 100644 --- a/holoviews/element/util.py +++ b/holoviews/element/util.py @@ -5,6 +5,7 @@ from ..core import Dataset, OrderedDict from ..core.boundingregion import BoundingBox +from ..core.data import default_datatype from ..core.operation import Operation from ..core.sheetcoords import Slice from ..core.util import (is_nan, sort_topologically, one_to_one, @@ -186,7 +187,7 @@ def _aggregate_dataset(self, obj, xcoords, ycoords): values = np.empty(nsamples) values[:] = np.NaN data[vdim.name] = values - dtype = 'dataframe' if pd else 'dictionary' + dtype = default_datatype dense_data = Dataset(data, kdims=obj.kdims, vdims=obj.vdims, datatype=[dtype]) concat_data = obj.interface.concatenate([dense_data, obj], datatype=[dtype]) reindexed = concat_data.reindex([xdim, ydim], vdims) diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index 5cb505700e..281b4851db 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -11,7 +11,7 @@ from ..core import (Operation, NdOverlay, Overlay, GridMatrix, HoloMap, Dataset, Element, Collator, Dimension) -from ..core.data import ArrayInterface, DictInterface +from ..core.data import ArrayInterface, DictInterface, default_datatype from ..core.util import (group_sanitizer, label_sanitizer, pd, basestring, datetime_types, isfinite, dt_to_int) from ..element.chart import Histogram, Scatter @@ -792,10 +792,7 @@ def _process(self, p, element, ranges={}): # Creates a unified Dataset.data attribute # to draw the data from if isinstance(element.data, np.ndarray): - if 'dataframe' in Dataset.datatype: - el_data = element.table('dataframe') - else: - el_data = element.table('dictionary') + el_data = element.table(default_datatype) else: el_data = element.data @@ -818,7 +815,7 @@ def _process(self, p, element, ranges={}): if p.diagonal_type is not None: if p.diagonal_type._auto_indexable_1d: el = p.diagonal_type(el_data, kdims=[d1], vdims=[d2], - datatype=['dataframe', 'dictionary']) + datatype=[default_datatype]) else: values = element.dimension_values(d1) el = p.diagonal_type(values, kdims=[d1]) @@ -830,7 +827,7 @@ def _process(self, p, element, ranges={}): else: kdims, vdims = ([d1, d2], []) if len(p.chart_type.kdims) == 2 else (d1, d2) el = p.chart_type(el_data, kdims=kdims, vdims=vdims, - datatype=['dataframe', 'dictionary']) + datatype=[default_datatype]) data[(d1.name, d2.name)] = el return data From bf5ce0023c22ac5f86cc76b77163acb773e2c04d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 00:18:30 +0100 Subject: [PATCH 03/14] Stop wrapping NdMapping values in tuples in groupby --- holoviews/core/util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/holoviews/core/util.py b/holoviews/core/util.py index aee1a66375..414d3f033d 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -1431,7 +1431,6 @@ def unpack_group(group, getter): if hasattr(obj, 'kdims'): yield (key, obj) else: - obj = tuple(v) yield (wrap_tuple(key), obj) @@ -1532,7 +1531,7 @@ def groupby_python(self_or_cls, ndmapping, dimensions, container_type, selects = get_unique_keys(ndmapping, dimensions) selects = group_select(list(selects)) groups = [(k, group_type((v.reindex(idims) if hasattr(v, 'kdims') - else [((), (v,))]), **kwargs)) + else [((), v)]), **kwargs)) for k, v in iterative_select(ndmapping, dim_names, selects)] return container_type(groups, kdims=dimensions) From 0ef96853e946888ddbb6e984f22e822e8cb079bb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 00:18:50 +0100 Subject: [PATCH 04/14] Simplified and improved casting --- holoviews/core/data/interface.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 9cef76a333..5103afc4a0 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -107,30 +107,21 @@ class Interface(param.Parameterized): def register(cls, interface): cls.interfaces[interface.datatype] = interface - @classmethod - def cast(cls, dataset, datatype=None, cast_type=None): + def cast(cls, datasets, datatype=None, cast_type=None): """ Given a list of Dataset objects, cast them to the specified datatype (by default the format matching the current interface) with the given cast_type (if specified). """ - if len({type(c) for c in dataset}) > 1 and cast_type is None: - raise Exception("Please supply the common cast type") - - if datatype is None: - datatype = cls.datatype - - unchanged = all({c.interface==cls for c in dataset}) - if unchanged and cast_type is None: - return dataset - elif unchanged: - return [cast_type(co, **dict(util.get_param_values(co))) - for co in dataset] - - return [co.clone(co.columns(), datatype=[datatype], new_type=cast_type) - for co in dataset] - + datatype = datatype or cls.datatype + interfaces = list(util.unique_iterator((d.interface for d in datasets))) + cast = [] + for ds in datasets: + if cast_type is not None or ds.interface.datatype in datatype: + ds = ds.clone(ds, datatype=[datatype], new_type=cast_type) + cast.append(ds) + return cast @classmethod def error(cls): @@ -167,7 +158,7 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): if data.interface.datatype in datatype and data.interface.datatype in eltype.datatype: data = data.data - elif data.interface.gridded: + elif data.interface.gridded and any(cls.interfaces[dt].gridded for dt in datatype): gridded = OrderedDict([(kd.name, data.dimension_values(kd.name, expanded=False)) for kd in data.kdims]) for vd in data.vdims: From 6e172812295b5b8a404e5f08bc9d10a167d3df9a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 02:54:19 +0100 Subject: [PATCH 05/14] Added concatenate utility --- holoviews/core/data/__init__.py | 35 ++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 273edf8567..5cac8cb410 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -61,11 +61,44 @@ from ..dimension import Dimension, process_dimensions from ..element import Element -from ..ndmapping import OrderedDict +from ..ndmapping import OrderedDict, NdMapping from ..spaces import HoloMap, DynamicMap from .. import util +def concat(datasets, datatype=None): + """ + Concatenates multiple datasets wrapped in an NdMapping type + along all of its dimensions. Before concatenation all datasets + are cast to the same datatype. For columnar data concatenation + adds the columns for the dimensions being concatenated along + and then concatenates all the old and new columns. For gridded + data a new axis is created for each dimension being concatenated + along and then hierarchically concatenates along each dimension. + + Signature + --------- + + datasets: NdMapping of Datasets defining dimensions to concatenate on + datatype: Datatype to cast data to before concatenation + + Returns: Dataset + """ + if isinstance(datasets, NdMapping): + dimensions = datasets.kdims + datasets = datasets.data + if isinstance(datasets, (dict, OrderedDict)): + datasets = datasets.items() + keys, datasets = zip(*datasets) + template = datasets[0] + datatype = datatype or template.interface.datatype + datasets = template.interface.cast(datasets, datatype) + template = datasets[0] + data = list(zip(keys, datasets)) + concat_data = template.interface.concat(data, dimensions, vdims=template.vdims) + return template.clone(concat_data, kdims=dimensions+template.kdims, new_type=Dataset) + + class DataConversion(object): """ DataConversion is a very simple container object which can be From 901bb17a9c94bd9569d53a9838ed6bcffdf62413 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 02:55:06 +0100 Subject: [PATCH 06/14] Fixed HoloMap.collapse for gridded data --- holoviews/core/spaces.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/holoviews/core/spaces.py b/holoviews/core/spaces.py index c85a213ecb..307cbc2be8 100644 --- a/holoviews/core/spaces.py +++ b/holoviews/core/spaces.py @@ -311,6 +311,7 @@ def collapse(self, dimensions=None, function=None, spreadfn=None, **kwargs): on the HoloMap. Homogeneous Elements may be collapsed by supplying a function, inhomogeneous elements are merged. """ + from .data import concat if not dimensions: dimensions = self.kdims if not isinstance(dimensions, list): dimensions = [dimensions] @@ -324,16 +325,18 @@ def collapse(self, dimensions=None, function=None, spreadfn=None, **kwargs): collapsed = groups.clone(shared_data=False) for key, group in groups.items(): - group_data = [el.data for el in group] - args = (group_data, function, group.last.kdims) if hasattr(group.last, 'interface'): - col_data = group.type(group.table().aggregate(group.last.kdims, function, spreadfn, **kwargs)) - + group_data = concat(group) + if function: + agg = group_data.aggregate(group.last.kdims, function, spreadfn, **kwargs) + group_data = group.type(agg) else: + group_data = [el.data for el in group] + args = (group_data, function, group.last.kdims) data = group.type.collapse_data(*args, **kwargs) - col_data = group.last.clone(data) - collapsed[key] = col_data - return collapsed if self.ndims > 1 else collapsed.last + group_data = group.last.clone(data) + collapsed[key] = group_data + return collapsed if self.ndims-len(dimensions) else collapsed.last def sample(self, samples=[], bounds=None, **sample_values): From 3ece3d9fb1d1fb6101e976a4f77bf1218b839599 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 13:57:25 +0100 Subject: [PATCH 07/14] Further improvements to grid concatenation --- holoviews/core/data/__init__.py | 7 +++++++ holoviews/core/data/dictionary.py | 5 ++++- holoviews/core/data/grid.py | 3 ++- holoviews/core/data/interface.py | 2 +- holoviews/core/data/iris.py | 14 ++++++++++++++ 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 5cac8cb410..7494bacae9 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -92,6 +92,13 @@ def concat(datasets, datatype=None): keys, datasets = zip(*datasets) template = datasets[0] datatype = datatype or template.interface.datatype + + # Handle non-general datatypes by casting to general type + if datatype == 'array': + datatype = default_datatype + elif datatype == 'image': + datatype = 'grid' + datasets = template.interface.cast(datasets, datatype) template = datasets[0] data = list(zip(keys, datasets)) diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index 594b04c6a1..7df96938b7 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -138,7 +138,10 @@ def unpack_scalar(cls, dataset, data): key = list(data.keys())[0] if len(data[key]) == 1 and key in dataset.vdims: - return data[key][0] + scalar = data[key][0] + return scalar.compute() if hasattr(scalar, 'compute') else scalar + return data + @classmethod def isscalar(cls, dataset, dim): diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index e7495e4c34..73863eba35 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -150,7 +150,8 @@ def irregular(cls, dataset, dim): @classmethod def isscalar(cls, dataset, dim): - return np.unique(cls.values(dataset, dim, expanded=False)) == 1 + values = cls.values(dataset, dim, expanded=False) + return values.shape in ((), (1,)) or len(np.unique(values)) == 1 @classmethod diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 5103afc4a0..d39b3fd84a 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -118,7 +118,7 @@ def cast(cls, datasets, datatype=None, cast_type=None): interfaces = list(util.unique_iterator((d.interface for d in datasets))) cast = [] for ds in datasets: - if cast_type is not None or ds.interface.datatype in datatype: + if cast_type is not None or ds.interface.datatype != datatype: ds = ds.clone(ds, datatype=[datatype], new_type=cast_type) cast.append(ds) return cast diff --git a/holoviews/core/data/iris.py b/holoviews/core/data/iris.py index e6025f4111..a316b5ae46 100644 --- a/holoviews/core/data/iris.py +++ b/holoviews/core/data/iris.py @@ -4,6 +4,8 @@ from itertools import product import iris +from iris.coords import AuxCoord +from iris.cube import CubeList from iris.util import guess_coord_axis import numpy as np @@ -230,6 +232,18 @@ def groupby(cls, dataset, dims, container_type=HoloMap, group_type=None, **kwarg else: return container_type(data) + @classmethod + def concat_dim(cls, datasets, dim, vdims): + """ + Concatenates datasets along one dimension + """ + cubes = [] + for c, cube in datasets.items(): + cube = cube.copy() + cube.add_aux_coord(AuxCoord([c], var_name=dim.name)) + cubes.append(cube) + return CubeList(cubes).merge()[0] + @classmethod def range(cls, dataset, dimension): From 95b6844a8b0cca9b399f330f121c7c32d3deed17 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 14:42:11 +0100 Subject: [PATCH 08/14] Updated old usages of Interface.concatenate --- holoviews/core/data/__init__.py | 21 +-------------- holoviews/core/data/array.py | 8 ------ holoviews/core/data/interface.py | 45 +++++++++++++++++++------------- holoviews/core/ndmapping.py | 11 ++------ holoviews/element/util.py | 2 +- 5 files changed, 31 insertions(+), 56 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 7494bacae9..4c20d4cd06 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -84,26 +84,7 @@ def concat(datasets, datatype=None): Returns: Dataset """ - if isinstance(datasets, NdMapping): - dimensions = datasets.kdims - datasets = datasets.data - if isinstance(datasets, (dict, OrderedDict)): - datasets = datasets.items() - keys, datasets = zip(*datasets) - template = datasets[0] - datatype = datatype or template.interface.datatype - - # Handle non-general datatypes by casting to general type - if datatype == 'array': - datatype = default_datatype - elif datatype == 'image': - datatype = 'grid' - - datasets = template.interface.cast(datasets, datatype) - template = datasets[0] - data = list(zip(keys, datasets)) - concat_data = template.interface.concat(data, dimensions, vdims=template.vdims) - return template.clone(concat_data, kdims=dimensions+template.kdims, new_type=Dataset) + return Interface.concatenate(datasets, datatype) class DataConversion(object): diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index ea4da50c32..7cec53550c 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -105,14 +105,6 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): return np.insert(data, dim_pos, values, axis=1) - @classmethod - def concat(cls, datasets, dimensions, vdims): - from . import default_datatype - keys, datasets = zip(*datasets) - datasets = cls.cast(datasets, default_datatype) - return datasets[0].interface.concat(list(zip(keys, datasets)), dimensions) - - @classmethod def sort(cls, dataset, by=[], reverse=False): data = dataset.data diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index d39b3fd84a..c5c84fc381 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -4,7 +4,7 @@ import numpy as np from ..element import Element -from ..ndmapping import OrderedDict +from ..ndmapping import OrderedDict, NdMapping from .. import util @@ -298,25 +298,34 @@ def range(cls, dataset, dimension): return column[0], column[-1] @classmethod - def concatenate(cls, dataset, datatype=None): + def concatenate(cls, datasets, datatype=None): """ - Utility function to concatenate a list of Column objects, - returning a new Dataset object. Note that this is unlike the - .concat method which only concatenates the data. + Utility function to concatenate an NdMapping of Dataset objects. """ - if len(set(type(c) for c in dataset)) != 1: - raise Exception("All inputs must be same type in order to concatenate") - - interfaces = set(c.interface for c in dataset) - if len(interfaces)!=1 and datatype is None: - raise Exception("Please specify the concatenated datatype") - elif len(interfaces)!=1: - interface = cls.interfaces[datatype] - else: - interface = interfaces.pop() - - concat_data = interface.concat(dataset) - return dataset[0].clone(concat_data) + from . import Dataset + if isinstance(datasets, NdMapping): + dimensions = datasets.kdims + datasets = datasets.data + if isinstance(datasets, (dict, OrderedDict)): + datasets = datasets.items() + keys, datasets = zip(*datasets) + elif isinstance(datasets, list) and not any(isinstance(v, tuple) for v in datasets): + keys = [()]*len(datasets) + dimensions = [] + template = datasets[0] + datatype = datatype or template.interface.datatype + + # Handle non-general datatypes by casting to general type + if datatype == 'array': + datatype = default_datatype + elif datatype == 'image': + datatype = 'grid' + + datasets = template.interface.cast(datasets, datatype) + template = datasets[0] + data = list(zip(keys, datasets)) if keys else datasets + concat_data = template.interface.concat(data, dimensions, vdims=template.vdims) + return template.clone(concat_data, kdims=dimensions+template.kdims, new_type=Dataset) @classmethod def reduce(cls, dataset, reduce_dims, function, **kwargs): diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index e0e9ac7b84..92e304296b 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -429,15 +429,8 @@ def info(self): def table(self, datatype=None, **kwargs): "Creates a table from the stored keys and data." - if datatype is None: - datatype = ['dataframe' if pd else 'dictionary'] - - tables = [] - for key, value in self.data.items(): - value = value.table(datatype=datatype, **kwargs) - for idx, (dim, val) in enumerate(zip(self.kdims, key)): - value = value.add_dimension(dim, idx, val) - tables.append(value) + new_data = [(key, value.table(datatype=datatype, **kwargs)) for key, value in self.data.items()] + tables = self.clone(tables, shared_data=False) return value.interface.concatenate(tables) diff --git a/holoviews/element/util.py b/holoviews/element/util.py index a682011f6a..bccdb96833 100644 --- a/holoviews/element/util.py +++ b/holoviews/element/util.py @@ -189,7 +189,7 @@ def _aggregate_dataset(self, obj, xcoords, ycoords): data[vdim.name] = values dtype = default_datatype dense_data = Dataset(data, kdims=obj.kdims, vdims=obj.vdims, datatype=[dtype]) - concat_data = obj.interface.concatenate([dense_data, obj], datatype=[dtype]) + concat_data = obj.interface.concatenate([dense_data, obj], datatype=dtype) reindexed = concat_data.reindex([xdim, ydim], vdims) if not reindexed: agg = reindexed From 7dd80c5dd84571d03bfdb7693b0ffd68c6b59822 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 2 Jun 2018 14:44:04 +0100 Subject: [PATCH 09/14] Fixed flakes --- holoviews/core/data/__init__.py | 2 +- holoviews/core/data/array.py | 1 - holoviews/core/data/dask.py | 1 - holoviews/core/data/grid.py | 1 + holoviews/core/data/interface.py | 3 +-- holoviews/core/ndmapping.py | 10 ++++++---- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 4c20d4cd06..0bcdad3fe4 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -61,7 +61,7 @@ from ..dimension import Dimension, process_dimensions from ..element import Element -from ..ndmapping import OrderedDict, NdMapping +from ..ndmapping import OrderedDict from ..spaces import HoloMap, DynamicMap from .. import util diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index 7cec53550c..a24e24eaed 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -1,4 +1,3 @@ -from collections import defaultdict try: import itertools.izip as zip except ImportError: diff --git a/holoviews/core/data/dask.py b/holoviews/core/data/dask.py index 37d1192a81..eaa836fdaa 100644 --- a/holoviews/core/data/dask.py +++ b/holoviews/core/data/dask.py @@ -251,7 +251,6 @@ def concat(cls, datasets, dimensions, vdims): for d, k in zip(dimensions, key): data[d.name] = k dataframes.append(data) - template = datasets[0][1] return dd.concat(dataframes) @classmethod diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 73863eba35..1780d2adc5 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -117,6 +117,7 @@ def init(cls, eltype, data, kdims, vdims): 'actual shape: %s' % (vdim, valid_shape, shape), cls) return data, {'kdims':kdims, 'vdims':vdims}, {} + @classmethod def concat(cls, datasets, dimensions, vdims): from . import Dataset diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index c5c84fc381..d1f3728c99 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -115,7 +115,6 @@ def cast(cls, datasets, datatype=None, cast_type=None): with the given cast_type (if specified). """ datatype = datatype or cls.datatype - interfaces = list(util.unique_iterator((d.interface for d in datasets))) cast = [] for ds in datasets: if cast_type is not None or ds.interface.datatype != datatype: @@ -302,7 +301,7 @@ def concatenate(cls, datasets, datatype=None): """ Utility function to concatenate an NdMapping of Dataset objects. """ - from . import Dataset + from . import Dataset, default_datatype if isinstance(datasets, NdMapping): dimensions = datasets.kdims datasets = datasets.data diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 92e304296b..99a2b9e45e 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -13,7 +13,7 @@ from . import util from .dimension import OrderedDict, Dimension, Dimensioned, ViewableElement from .util import (unique_iterator, sanitize_identifier, dimension_sort, - basestring, wrap_tuple, process_ellipses, get_ndmapping_label, pd) + basestring, wrap_tuple, process_ellipses, get_ndmapping_label) class item_check(object): @@ -429,9 +429,11 @@ def info(self): def table(self, datatype=None, **kwargs): "Creates a table from the stored keys and data." - new_data = [(key, value.table(datatype=datatype, **kwargs)) for key, value in self.data.items()] - tables = self.clone(tables, shared_data=False) - return value.interface.concatenate(tables) + from .data.interface import Interface + new_data = [(key, value.table(datatype=datatype, **kwargs)) + for key, value in self.data.items()] + tables = self.clone(new_data) + return Interface.concatenate(tables) def dframe(self): From ffad7550f54c886767d40082aff5bf55343a2d79 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 20 Jun 2018 12:46:53 +0100 Subject: [PATCH 10/14] Fixes for iris concatenation --- holoviews/core/data/iris.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/holoviews/core/data/iris.py b/holoviews/core/data/iris.py index a316b5ae46..9b8108cd18 100644 --- a/holoviews/core/data/iris.py +++ b/holoviews/core/data/iris.py @@ -4,8 +4,9 @@ from itertools import product import iris -from iris.coords import AuxCoord +from iris.coords import DimCoord from iris.cube import CubeList +from iris.experimental.equalise_cubes import equalise_attributes from iris.util import guess_coord_axis import numpy as np @@ -240,9 +241,11 @@ def concat_dim(cls, datasets, dim, vdims): cubes = [] for c, cube in datasets.items(): cube = cube.copy() - cube.add_aux_coord(AuxCoord([c], var_name=dim.name)) + cube.add_aux_coord(DimCoord([c], var_name=dim.name)) cubes.append(cube) - return CubeList(cubes).merge()[0] + cubes = CubeList(cubes) + equalise_attributes(cubes) + return cubes.merge_cube() @classmethod @@ -275,7 +278,7 @@ def length(cls, dataset): """ Returns the total number of samples in the dataset. """ - return np.product([len(d.points) for d in dataset.data.coords()]) + return np.product([len(d.points) for d in dataset.data.coords(dim_coords=True)]) @classmethod From a96e32c24eb6735eb84fab0bdd057f3677e72ee3 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 20 Jun 2018 12:47:04 +0100 Subject: [PATCH 11/14] Added validation for GridInterface.concat --- holoviews/core/data/grid.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 1780d2adc5..d2c4968504 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -139,6 +139,12 @@ def concat_dim(cls, datasets, dim, vdims): new_data[dim.name] = np.array(values) for vdim in vdims: arrays = [grid[vdim.name] for grid in grids] + shapes = set(arr.shape for arr in arrays) + if len(shapes) > 1: + raise DataError('When concatenating gridded data the shape ' + 'of arrays must match. %s found that arrays ' + 'along the %s dimension do not match.' % + (cls.__name__, vdim.name)) stack = np.stack if any(is_dask(arr) for arr in arrays) else da.stack new_data[vdim.name] = stack(arrays, -1) return new_data From 67bd62a276ed7e0c00689aa43d1a069674c487d6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 20 Jun 2018 12:47:22 +0100 Subject: [PATCH 12/14] Improved tests for grid concatenation --- tests/core/data/base.py | 22 ++++++++++++++++++++++ tests/core/data/testirisinterface.py | 13 ++++++++++++- tests/core/data/testxarrayinterface.py | 15 ++++++++++++++- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/tests/core/data/base.py b/tests/core/data/base.py index 7c2608cdd2..20c0a84ac2 100644 --- a/tests/core/data/base.py +++ b/tests/core/data/base.py @@ -9,6 +9,8 @@ import numpy as np from holoviews import Dataset, HoloMap, Dimension +from holoviews.core.data import concat +from holoviews.core.data.interface import DataError from holoviews.element import Scatter, Curve from holoviews.element.comparison import ComparisonTestCase @@ -1081,3 +1083,23 @@ def test_aggregate_2d_with_spreadfn(self): agg = ds.aggregate('x', np.mean, np.std) example = Dataset((range(5), array.mean(axis=0), array.std(axis=0)), 'x', ['z', 'z_std']) self.assertEqual(agg, example) + + def test_concat_grid_3d(self): + array = np.random.rand(4, 5, 3, 2) + orig = Dataset((range(2), range(3), range(5), range(4), array), ['A', 'B', 'x', 'y'], 'z') + hmap = HoloMap({(i, j): self.element((range(5), range(4), array[:, :, j, i]), ['x', 'y'], 'z') + for i in range(2) for j in range(3)}, ['A', 'B']) + ds = concat(hmap) + self.assertEqual(ds, orig) + + def test_concat_grid_3d_shape_mismatch(self): + ds1 = Dataset(([0, 1], [1, 2, 3], np.random.rand(3, 2)), ['x', 'y'], 'z') + ds2 = Dataset(([0, 1, 2], [1, 2], np.random.rand(2, 3)), ['x', 'y'], 'z') + hmap = HoloMap({1: ds1, 2: ds2}) + with self.assertRaises(DataError): + concat(hmap) + + def test_grid_3d_groupby_concat_roundtrip(self): + array = np.random.rand(4, 5, 3, 2) + orig = Dataset((range(2), range(3), range(5), range(4), array), ['A', 'B', 'x', 'y'], 'z') + self.assertEqual(concat(orig.groupby(['A', 'B'])), orig) diff --git a/tests/core/data/testirisinterface.py b/tests/core/data/testirisinterface.py index ccd9785933..6735c2f8c7 100644 --- a/tests/core/data/testirisinterface.py +++ b/tests/core/data/testirisinterface.py @@ -6,11 +6,13 @@ try: import iris from iris.tests.stock import lat_lon_cube + from iris.exceptions import MergeError except ImportError: raise SkipTest("Could not import iris, skipping IrisInterface tests.") -from holoviews.core.data import Dataset +from holoviews.core.data import Dataset, concat from holoviews.core.data.iris import coord_to_dimension +from holoviews.core.spaces import HoloMap from holoviews.element import Image from .testimageinterface import Image_ImageInterfaceTests @@ -30,6 +32,15 @@ def init_data(self): self.cube = lat_lon_cube() self.epsilon = 0.01 + def test_concat_grid_3d_shape_mismatch(self): + arr1 = np.random.rand(3, 2) + arr2 = np.random.rand(2, 3) + ds1 = Dataset(([0, 1], [1, 2, 3], arr1), ['x', 'y'], 'z') + ds2 = Dataset(([0, 1, 2], [1, 2], arr2), ['x', 'y'], 'z') + hmap = HoloMap({1: ds1, 2: ds2}) + with self.assertRaises(MergeError): + concat(hmap) + def test_dataset_array_init_hm(self): "Tests support for arrays (homogeneous)" raise SkipTest("Not supported") diff --git a/tests/core/data/testxarrayinterface.py b/tests/core/data/testxarrayinterface.py index 408a21c6de..de8fc3e028 100644 --- a/tests/core/data/testxarrayinterface.py +++ b/tests/core/data/testxarrayinterface.py @@ -9,8 +9,9 @@ except: raise SkipTest("Could not import xarray, skipping XArrayInterface tests.") -from holoviews.core.data import Dataset +from holoviews.core.data import Dataset, concat from holoviews.core.dimension import Dimension +from holoviews.core.spaces import HoloMap from holoviews.element import Image, RGB, HSV from .testimageinterface import ( @@ -109,6 +110,18 @@ def test_xarray_coord_ordering(self): ds = Dataset(dataset) self.assertEqual(ds.kdims, ['b', 'c', 'a']) + def test_concat_grid_3d_shape_mismatch(self): + arr1 = np.random.rand(3, 2) + arr2 = np.random.rand(2, 3) + ds1 = Dataset(([0, 1], [1, 2, 3], arr1), ['x', 'y'], 'z') + ds2 = Dataset(([0, 1, 2], [1, 2], arr2), ['x', 'y'], 'z') + hmap = HoloMap({1: ds1, 2: ds2}) + arr = np.full((3, 3, 2), np.NaN) + arr[:, :2, 0] = arr1 + arr[:2, :, 1] = arr2 + ds = Dataset(([1, 2], [0, 1, 2], [1, 2, 3], arr), ['Default', 'x', 'y'], 'z') + self.assertEqual(concat(hmap), ds) + def test_dataset_array_init_hm(self): "Tests support for arrays (homogeneous)" raise SkipTest("Not supported") From fc15f1aae8fba3bb32cae9774ef54f7369b524c2 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 20 Jun 2018 13:14:58 +0100 Subject: [PATCH 13/14] Fixed bug in NdMapping.table --- holoviews/core/data/interface.py | 5 +++-- holoviews/core/ndmapping.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index d1f3728c99..d5f999ba84 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -297,11 +297,12 @@ def range(cls, dataset, dimension): return column[0], column[-1] @classmethod - def concatenate(cls, datasets, datatype=None): + def concatenate(cls, datasets, datatype=None, new_type=None): """ Utility function to concatenate an NdMapping of Dataset objects. """ from . import Dataset, default_datatype + new_type = new_type or Dataset if isinstance(datasets, NdMapping): dimensions = datasets.kdims datasets = datasets.data @@ -324,7 +325,7 @@ def concatenate(cls, datasets, datatype=None): template = datasets[0] data = list(zip(keys, datasets)) if keys else datasets concat_data = template.interface.concat(data, dimensions, vdims=template.vdims) - return template.clone(concat_data, kdims=dimensions+template.kdims, new_type=Dataset) + return template.clone(concat_data, kdims=dimensions+template.kdims, new_type=new_type) @classmethod def reduce(cls, dataset, reduce_dims, function, **kwargs): diff --git a/holoviews/core/ndmapping.py b/holoviews/core/ndmapping.py index 99a2b9e45e..a0d6519548 100644 --- a/holoviews/core/ndmapping.py +++ b/holoviews/core/ndmapping.py @@ -430,10 +430,11 @@ def info(self): def table(self, datatype=None, **kwargs): "Creates a table from the stored keys and data." from .data.interface import Interface + from ..element.tabular import Table new_data = [(key, value.table(datatype=datatype, **kwargs)) for key, value in self.data.items()] tables = self.clone(new_data) - return Interface.concatenate(tables) + return Interface.concatenate(tables, new_type=Table) def dframe(self): From 6356310d3e4afaf5e89fa1c53374bdb64539cb18 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 20 Jun 2018 18:46:05 +0100 Subject: [PATCH 14/14] Addressed review comments --- holoviews/core/data/__init__.py | 16 +++++++++------- holoviews/core/data/interface.py | 21 ++++++++++++++------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 0bcdad3fe4..30b5944aa9 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -68,13 +68,15 @@ def concat(datasets, datatype=None): """ - Concatenates multiple datasets wrapped in an NdMapping type - along all of its dimensions. Before concatenation all datasets - are cast to the same datatype. For columnar data concatenation - adds the columns for the dimensions being concatenated along - and then concatenates all the old and new columns. For gridded - data a new axis is created for each dimension being concatenated - along and then hierarchically concatenates along each dimension. + Concatenates multiple datasets wrapped in an NdMapping type along + all of its dimensions. Before concatenation all datasets are cast + to the same datatype, which may be explicitly defined or + implicitly derived from the first datatype that is + encountered. For columnar data concatenation adds the columns for + the dimensions being concatenated along and then concatenates all + the old and new columns. For gridded data a new axis is created + for each dimension being concatenated along and then + hierarchically concatenates along each dimension. Signature --------- diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index d5f999ba84..cfc6985067 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -305,13 +305,14 @@ def concatenate(cls, datasets, datatype=None, new_type=None): new_type = new_type or Dataset if isinstance(datasets, NdMapping): dimensions = datasets.kdims - datasets = datasets.data - if isinstance(datasets, (dict, OrderedDict)): - datasets = datasets.items() - keys, datasets = zip(*datasets) - elif isinstance(datasets, list) and not any(isinstance(v, tuple) for v in datasets): - keys = [()]*len(datasets) - dimensions = [] + keys, datasets = zip(*datasets.data.items()) + elif isinstance(datasets, list) and all(not isinstance(v, tuple) for v in datasets): + # Allow concatenating list of datasets (by declaring no dimensions and keys) + dimensions, keys = [], [()]*len(datasets) + else: + raise DataError('Concatenation only supported for NdMappings ' + 'and lists of Datasets, found %s.' % type(datasets).__name__) + template = datasets[0] datatype = datatype or template.interface.datatype @@ -321,6 +322,12 @@ def concatenate(cls, datasets, datatype=None, new_type=None): elif datatype == 'image': datatype = 'grid' + if len(datasets) > 1 and not dimensions and cls.interfaces[datatype].gridded: + raise DataError('Datasets with %s datatype cannot be concatenated ' + 'without defining the dimensions to concatenate along. ' + 'Ensure you pass in a NdMapping (e.g. a HoloMap) ' + 'of Dataset types, not a list.' % datatype) + datasets = template.interface.cast(datasets, datatype) template = datasets[0] data = list(zip(keys, datasets)) if keys else datasets