Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for packing grid value dimensions #3983

Merged
merged 9 commits into from
Sep 24, 2019
90 changes: 77 additions & 13 deletions holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,15 @@ def init(cls, eltype, data, kdims, vdims):

ndims = len(kdims)
dimensions = [dimension_name(d) for d in kdims+vdims]
vdim_tuple = tuple(dimension_name(vd) for vd in vdims)
if isinstance(data, tuple):
data = {d: v for d, v in zip(dimensions, data)}
if (len(data) != len(dimensions) and len(data) == (ndims+1) and
len(data[-1].shape) == (ndims+1)):
value_array = data[-1]
data = {d: v for d, v in zip(dimensions, data[:-1])}
data[vdim_tuple] = value_array
else:
data = {d: v for d, v in zip(dimensions, data)}
elif isinstance(data, list) and data == []:
data = OrderedDict([(d, []) for d in dimensions])
elif not any(isinstance(data, tuple(t for t in interface.types if t is not None))
Expand All @@ -78,22 +85,37 @@ def init(cls, eltype, data, kdims, vdims):
raise TypeError('GridInterface must be instantiated as a '
'dictionary or tuple')

for dim in kdims+vdims:
validate_dims = list(kdims)
if vdim_tuple in data:
if not isinstance(data[vdim_tuple], get_array_types()):
data[vdim_tuple] = np.array(data[vdim_tuple])
else:
validate_dims += vdims

for dim in validate_dims:
name = dimension_name(dim)
if name not in data:
raise ValueError("Values for dimension %s not found" % dim)
if not isinstance(data[name], get_array_types()):
data[name] = np.array(data[name])

kdim_names = [dimension_name(d) for d in kdims]
vdim_names = [dimension_name(d) for d in vdims]
if vdim_tuple in data:
vdim_names = [vdim_tuple]
else:
vdim_names = [dimension_name(d) for d in vdims]

expected = tuple([len(data[kd]) for kd in kdim_names])
irregular_shape = data[kdim_names[0]].shape if kdim_names else ()
valid_shape = irregular_shape if len(irregular_shape) > 1 else expected[::-1]
shapes = tuple([data[kd].shape for kd in kdim_names])
for vdim in vdim_names:
shape = data[vdim].shape
error = DataError if len(shape) > 1 else ValueError
if vdim_tuple in data:
if shape[-1] != len(vdims):
raise error('The shape of the value array does not match the number of value dimensions.')
shape = shape[:-1]
if (not expected and shape == (1,)) or (len(set((shape,)+shapes)) == 1 and len(shape) > 1):
# If empty or an irregular mesh
pass
Expand Down Expand Up @@ -154,7 +176,13 @@ def isscalar(cls, dataset, dim):

@classmethod
def validate(cls, dataset, vdims=True):
Interface.validate(dataset, vdims)
dims = 'all' if vdims else 'key'
not_found = [d for d in dataset.dimensions(dims, label='name')
if d not in dataset.data]
if not_found and tuple(not_found) not in dataset.data:
raise DataError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found), cls)


@classmethod
Expand All @@ -166,9 +194,33 @@ def dimension_type(cls, dataset, dim):
return arr.dtype.type


@classmethod
def packed(cls, dataset):
vdim_tuple = tuple(vd.name for vd in dataset.vdims)
return vdim_tuple if vdim_tuple in dataset.data else False


@classmethod
def dtype(cls, dataset, dimension):
name = dataset.get_dimension(dimension, strict=True).name
vdim_tuple = cls.packed(dataset)
if vdim_tuple and name in vdim_tuple:
data = dataset.data[vdim_tuple][..., vdim_tuple.index(name)]
else:
data = dataset.data[name]
if util.isscalar(data):
return np.array([data]).dtype
else:
return data.dtype


@classmethod
def shape(cls, dataset, gridded=False):
shape = dataset.data[dataset.vdims[0].name].shape
vdim_tuple = cls.packed(dataset)
if vdim_tuple:
shape = dataset.data[vdim_tuple].shape[:-1]
else:
shape = dataset.data[dataset.vdims[0].name].shape
if gridded:
return shape
else:
Expand Down Expand Up @@ -343,7 +395,11 @@ def values(
):
dim = dataset.get_dimension(dim, strict=True)
if dim in dataset.vdims or dataset.data[dim.name].ndim > 1:
data = dataset.data[dim.name]
vdim_tuple = cls.packed(dataset)
if vdim_tuple:
data = dataset.data[vdim_tuple][..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim.name]
data = cls.canonicalize(dataset, data)
da = dask_array_module()
if compute and da and isinstance(data, da.Array):
Expand Down Expand Up @@ -582,13 +638,21 @@ def aggregate(cls, dataset, kdims, function, **kwargs):
for kdim in dataset.kdims if kdim not in kdims)
da = dask_array_module()
dropped = []
for vdim in dataset.vdims:
values = dataset.data[vdim.name]
atleast_1d = da.atleast_1d if is_dask(values) else np.atleast_1d
try:
data[vdim.name] = atleast_1d(function(values, axis=axes, **kwargs))
except TypeError:
dropped.append(vdim)
vdim_tuple = cls.packed(dataset)
if vdim_tuple:
values = dataset.data[vdim_tuple]
if axes:
data[vdim_tuple] = function(values, axis=axes, **kwargs)
else:
data[vdim_tuple] = values
else:
for vdim in dataset.vdims:
values = dataset.data[vdim.name]
atleast_1d = da.atleast_1d if is_dask(values) else np.atleast_1d
try:
data[vdim.name] = atleast_1d(function(values, axis=axes, **kwargs))
except TypeError:
dropped.append(vdim)
return data, dropped


Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def expanded(cls, arrays):

@classmethod
def isscalar(cls, dataset, dim):
return cls.values(dataset, dim, expanded=False) == 1
return len(cls.values(dataset, dim, expanded=False)) == 1


@classmethod
Expand Down
114 changes: 87 additions & 27 deletions holoviews/core/data/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,32 @@ def applies(cls, obj):
@classmethod
def dimension_type(cls, dataset, dim):
name = dataset.get_dimension(dim, strict=True).name
if cls.packed(dataset) and name in dataset.vdims:
return dataset.data.dtype.type
return dataset.data[name].dtype.type


@classmethod
def dtype(cls, dataset, dim):
name = dataset.get_dimension(dim, strict=True).name
if cls.packed(dataset) and name in dataset.vdims:
return dataset.data.dtype
return dataset.data[name].dtype

@classmethod
def packed(cls, dataset):
import xarray as xr
return isinstance(dataset.data, xr.DataArray)

@classmethod
def shape(cls, dataset, gridded=False):
array = dataset.data[dataset.vdims[0].name]
if cls.packed(dataset):
shape = dataset.data.shape[:-1]
if gridded:
return shape
else:
return (np.product(shape, dtype=np.intp), len(dataset.dimensions()))
else:
array = dataset.data[dataset.vdims[0].name]
if not any(cls.irregular(dataset, kd) for kd in dataset.kdims):
names = [kd.name for kd in dataset.kdims
if kd.name in array.dims][::-1]
Expand Down Expand Up @@ -81,8 +95,13 @@ def retrieve_unit_and_label(dim):
spec = (dim.name, dim.label)
return dim.clone(spec, unit=unit)

packed = False
if isinstance(data, xr.DataArray):
if vdims:
kdim_len = len(kdim_param.default) if kdims is None else len(kdims)
vdim_len = len(vdim_param.default) if vdims is None else len(vdims)
if vdim_len > 1 and kdim_len == len(data.dims)-1 and data.shape[-1] == vdim_len:
packed = True
elif vdims:
vdim = vdims[0]
elif data.name:
vdim = Dimension(data.name)
Expand All @@ -104,10 +123,11 @@ def retrieve_unit_and_label(dim):
"dimension. Give the DataArray a name or "
"supply an explicit vdim." % eltype.__name__,
cls)
vdims = [vdim]
data = data.to_dataset(name=vdim.name)
if not packed:
vdims = [vdim]
data = data.to_dataset(name=vdim.name)

if not isinstance(data, xr.Dataset):
if not isinstance(data, (xr.Dataset, xr.DataArray)):
if kdims is None:
kdims = kdim_param.default
if vdims is None:
Expand All @@ -116,10 +136,18 @@ def retrieve_unit_and_label(dim):
vdims = [asdim(vd) for vd in vdims]
if isinstance(data, np.ndarray) and data.ndim == 2 and data.shape[1] == len(kdims+vdims):
data = tuple(data)

ndims = len(kdims)
if isinstance(data, tuple):
data = {d.name: vals for d, vals in zip(kdims + vdims, data)}
dimensions = [d.name for d in kdims+vdims]
if (len(data) != len(dimensions) and len(data) == (ndims+1) and
len(data[-1].shape) == (ndims+1)):
value_array = data[-1]
data = {d: v for d, v in zip(dimensions, data[:-1])}
packed = True
else:
data = {d: v for d, v in zip(dimensions, data)}
elif isinstance(data, list) and data == []:
ndims = len(kdims)
dimensions = [d.name for d in kdims + vdims]
data = {d: np.array([]) for d in dimensions[:ndims]}
data.update({d: np.empty((0,) * ndims) for d in dimensions[ndims:]})
Expand All @@ -138,13 +166,18 @@ def retrieve_unit_and_label(dim):
coord = coord_vals
coords[kd.name] = coord
xr_kwargs = {'dims': dims if max(coord_dims) > 1 else list(coords)[::-1]}
arrays = {}
for vdim in vdims:
arr = data[vdim.name]
if not isinstance(arr, xr.DataArray):
arr = xr.DataArray(arr, coords=coords, **xr_kwargs)
arrays[vdim.name] = arr
data = xr.Dataset(arrays)
if packed:
xr_kwargs['dims'] = list(coords)[::-1] + ['band']
coords['band'] = list(range(len(vdims)))
data = xr.DataArray(value_array, coords=coords, **xr_kwargs)
else:
arrays = {}
for vdim in vdims:
arr = data[vdim.name]
if not isinstance(arr, xr.DataArray):
arr = xr.DataArray(arr, coords=coords, **xr_kwargs)
arrays[vdim.name] = arr
data = xr.Dataset(arrays)
else:
if not data.coords:
data = data.assign_coords(**{k: range(v) for k, v in data.dims.items()})
Expand All @@ -156,7 +189,9 @@ def retrieve_unit_and_label(dim):
kdims = [name for name in data.indexes.keys()
if isinstance(data[name].data, np.ndarray)]
kdims = sorted(kdims, key=lambda x: (xrcoords.index(x) if x in xrcoords else float('inf'), x))
if set(xrdims) != set(kdims):
if packed:
kdims = kdims[:-1]
elif set(xrdims) != set(kdims):
virtual_dims = [xd for xd in xrdims if xd not in kdims]
for c in data.coords:
if c not in kdims and set(data[c].dims) == set(virtual_dims):
Expand All @@ -169,7 +204,7 @@ def retrieve_unit_and_label(dim):
if not any(d.name == k or (isinstance(v, xr.DataArray) and d.name in v.dims)
for k, v in data.coords.items()):
not_found.append(d)
if not isinstance(data, xr.Dataset):
if not isinstance(data, (xr.Dataset, xr.DataArray)):
raise TypeError('Data must be be an xarray Dataset type.')
elif not_found:
raise DataError("xarray Dataset must define coordinates "
Expand All @@ -181,7 +216,17 @@ def retrieve_unit_and_label(dim):

@classmethod
def validate(cls, dataset, vdims=True):
Interface.validate(dataset, vdims)
import xarray as xr
print(dataset.data)
if isinstance(dataset.data, xr.Dataset):
Interface.validate(dataset, vdims)
else:
not_found = [kd.name for kd in dataset.kdims if kd.name not in dataset.data.coords]
if not_found:
raise DataError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found), cls)

# Check whether irregular (i.e. multi-dimensional) coordinate
# array dimensionality matches
irregular = []
Expand Down Expand Up @@ -210,7 +255,10 @@ def range(cls, dataset, dimension):
else:
dmin, dmax = np.nanmin(data), np.nanmax(data)
else:
data = dataset.data[dim]
if cls.packed(dataset) and dim in dataset.vdims:
data = dataset.data.values[..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim]
if len(data):
dmin, dmax = data.min().data, data.max().data
else:
Expand Down Expand Up @@ -248,9 +296,6 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):

drop_dim = any(d not in group_kwargs['kdims'] for d in element_dims)

# XArray 0.7.2 does not support multi-dimensional groupby
# Replace custom implementation when
# https://github.com/pydata/xarray/pull/818 is merged.
group_by = [d.name for d in index_dims]
data = []
if len(dimensions) == 1:
Expand Down Expand Up @@ -313,15 +358,22 @@ def coords(cls, dataset, dimension, ordered=False, expanded=False, edges=False):
@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
dim = dataset.get_dimension(dim, strict=True)
data = dataset.data[dim.name].data
packed = cls.packed(dataset) and dim in dataset.vdims
if packed:
data = dataset.data.data[..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim.name].data
irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False
irregular_kdims = [d for d in dataset.kdims if cls.irregular(dataset, d)]
if irregular_kdims:
virtual_coords = list(dataset.data[irregular_kdims[0].name].coords.dims)
else:
virtual_coords = []
if dim in dataset.vdims or irregular:
data_coords = list(dataset.data[dim.name].dims)
if packed:
data_coords = list(dataset.data.dims)[:-1]
else:
data_coords = list(dataset.data[dim.name].dims)
da = dask_array_module()
if compute and da and isinstance(data, da.Array):
data = data.compute()
Expand All @@ -347,7 +399,7 @@ def unpack_scalar(cls, dataset, data):
Given a dataset object and data in the appropriate format for
the interface, return a simple scalar.
"""
if (len(data.data_vars) == 1 and
if (not cls.packed(dataset) and len(data.data_vars) == 1 and
len(data[dataset.vdims[0].name].shape) == 0):
return data[dataset.vdims[0].name].item()
return data
Expand Down Expand Up @@ -396,6 +448,11 @@ def ndloc(cls, dataset, indices):
for ind in adjusted_indices) and len(indices) == len(kdims))
if sampled or (all_scalar and len(indices) == len(kdims)):
import xarray as xr
if cls.packed(dataset):
selected = dataset.data.isel({k: xr.DataArray(v) for k, v in isel.items()})
df = selected.to_dataframe('vdims')[['vdims']].T
vdims = [vd.name for vd in dataset.vdims]
return df.rename(columns={i: d for i, d in enumerate(vdims)})[vdims]
if all_scalar: isel = {k: [v] for k, v in isel.items()}
selected = dataset.data.isel({k: xr.DataArray(v) for k, v in isel.items()})
return selected.to_dataframe().reset_index()
Expand All @@ -422,8 +479,11 @@ def reindex(cls, dataset, kdims=None, vdims=None):
if len(vals) == 1:
constant[kd.name] = vals[0]
if len(constant) == len(dropped_kdims):
return dataset.data.sel(**{k: v for k, v in constant.items()
if k in dataset.data.dims})
dropped = dataset.data.sel(**{k: v for k, v in constant.items()
if k in dataset.data.dims})
if vdims and cls.packed(dataset):
return dropped.isel(**{dataset.data.dims[-1]: [dataset.vdims.index(vd) for vd in vdims]})
return dropped
elif dropped_kdims:
return tuple(dataset.columns(kdims+vdims).values())
return dataset.data
Expand Down
Loading