diff --git a/.travis.yml b/.travis.yml index 0b6d1edd..f446d60e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,10 +6,10 @@ os: env: global: - PYENV_VERSION=3.7 - - PYTHON_VERSION=3.6 - - PKG_TEST_PYTHON="--test-python=py36 --test-python=py27" - - CHANS_DEV="-c pyviz/label/dev -c bokeh/label/dev" - - CHANS_REL="-c pyviz -c bokeh" + - PYTHON_VERSION=3.7 + - PKG_TEST_PYTHON="--test-python=py37 --test-python=py27" + - CHANS_DEV="-c pyviz/label/dev" + - CHANS_REL="-c pyviz" - LABELS_DEV="--label dev" - LABELS_REL="--label dev --label main" - MPLBACKEND="Agg" diff --git a/geoviews/data/geom_dict.py b/geoviews/data/geom_dict.py index 8d76a102..5da766d1 100644 --- a/geoviews/data/geom_dict.py +++ b/geoviews/data/geom_dict.py @@ -3,6 +3,8 @@ import numpy as np from holoviews.core.data import Interface, DictInterface, MultiInterface +from holoviews.core.data.interface import DataError +from holoviews.core.data.spatialpandas import to_geom_dict from holoviews.core.dimension import OrderedDict as cyODict, dimension_name from holoviews.core.util import isscalar @@ -32,6 +34,24 @@ def init(cls, eltype, data, kdims, vdims): dimensions = [dimension_name(d) for d in kdims + vdims] if isinstance(data, geom_types): data = {'geometry': data} + elif not isinstance(data, dict) or 'geometry' not in data: + xdim, ydim = kdims[:2] + from shapely.geometry import ( + Point, LineString, Polygon, MultiPoint, MultiPolygon, + MultiLineString, LinearRing + ) + data = to_geom_dict(eltype, data, kdims, vdims, GeomDictInterface) + geom = data.get('geom_type') or MultiInterface.geom_type(eltype) + poly = 'holes' in data or geom == 'Polygon' + if poly: + single_type, multi_type = Polygon, MultiPolygon + elif geom == 'Line': + single_type, multi_type = LineString, MultiLineString + elif geom == 'Ring': + single_type, multi_type = LinearRing, MultiPolygon + else: + single_type, multi_type = Point, MultiPoint + data['geometry'] = geom_from_dict(data, xdim.name, ydim.name, single_type, multi_type) if not cls.applies(data): raise ValueError("GeomDictInterface only handles dictionary types " @@ -71,15 +91,43 @@ def init(cls, eltype, data, kdims, vdims): @classmethod def validate(cls, dataset, validate_vdims): - assert len([d for d in dataset.kdims + dataset.vdims - if d.name not in dataset.data]) == 2 + from shapely.geometry.base import BaseGeometry + geom_dims = cls.geom_dims(dataset) + if len(geom_dims) != 2: + raise DataError('Expected %s instance to declare two key ' + 'dimensions corresponding to the geometry ' + 'coordinates but %d dimensions were found ' + 'which did not refer to any columns.' + % (type(dataset).__name__, len(geom_dims)), cls) + elif 'geometry' not in dataset.data: + raise DataError("Could not find a 'geometry' column in the data.") + elif not isinstance(dataset.data['geometry'], BaseGeometry): + raise DataError("The 'geometry' column should be a shapely" + "geometry type, found %s type instead." % + type(dataset.data['geometry']).__name__) @classmethod - def dtype(cls, dataset, dimension): - name = dataset.get_dimension(dimension, strict=True).name - if name not in dataset.data: - return np.dtype('float') # Geometry dimension - return super(GeomDictInterface, cls).dtype(dataset, dimension) + def shape(cls, dataset): + return (cls.length(dataset), len(dataset.dimensions())) + + @classmethod + def geom_type(cls, dataset): + from shapely.geometry import ( + Polygon, MultiPolygon, LineString, MultiLineString, LinearRing + ) + geom = dataset.data['geometry'] + if isinstance(geom, (Polygon, MultiPolygon)): + return 'Polygon' + elif isinstance(geom, LinearRing): + return 'Ring' + elif isinstance(geom, (LineString, MultiLineString)): + return 'Line' + else: + return 'Point' + + @classmethod + def geo_column(cls, dataset): + return 'geometry' @classmethod def has_holes(cls, dataset): @@ -111,6 +159,13 @@ def dimension_type(cls, dataset, dim): values = dataset.data[name] return type(values) if isscalar(values) else values.dtype.type + @classmethod + def dtype(cls, dataset, dimension): + name = dataset.get_dimension(dimension, strict=True).name + if name in cls.geom_dims(dataset): + return np.dtype('float') + return Interface.dtype(dataset, dimension) + @classmethod def range(cls, dataset, dim): dim = dataset.get_dimension(dim) @@ -150,11 +205,82 @@ def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index @classmethod def select(cls, dataset, selection_mask=None, **selection): - raise NotImplementedError('select operation not implemented on geometries') + if cls.geom_dims(dataset): + data = cls.shape_mask(dataset, selection) + else: + data = dataset.data + if selection_mask is None: + selection_mask = cls.select_mask(dataset, selection) + empty = not selection_mask.sum() + dimensions = dataset.dimensions() + if empty: + return {d.name: np.array([], dtype=cls.dtype(dataset, d)) + for d in dimensions} + indexed = cls.indexed(dataset, selection) + new_data = {} + for k, v in data.items(): + if k not in dimensions or isscalar(v): + new_data[k] = v + else: + new_data[k] = v[selection_mask] + if indexed and len(list(new_data.values())[0]) == 1 and len(dataset.vdims) == 1: + value = new_data[dataset.vdims[0].name] + return value if isscalar(value) else value[0] + return new_data + + @classmethod + def shape_mask(cls, dataset, selection): + xdim, ydim = cls.geom_dims(dataset) + xsel = selection.pop(xdim.name, None) + ysel = selection.pop(ydim.name, None) + if xsel is None and ysel is None: + return dataset.data + + from shapely.geometry import box + + if xsel is None: + x0, x1 = cls.range(dataset, xdim) + elif isinstance(xsel, slice): + x0, x1 = xsel.start, xsel.stop + elif isinstance(xsel, tuple): + x0, x1 = xsel + else: + raise ValueError("Only slicing is supported on geometries, %s " + "selection is of type %s." + % (xdim, type(xsel).__name__)) + + if ysel is None: + y0, y1 = cls.range(dataset, ydim) + elif isinstance(ysel, slice): + y0, y1 = ysel.start, ysel.stop + elif isinstance(ysel, tuple): + y0, y1 = ysel + else: + raise ValueError("Only slicing is supported on geometries, %s " + "selection is of type %s." + % (ydim, type(ysel).__name__)) + + bounds = box(x0, y0, x1, y1) + geom = dataset.data['geometry'] + geom = geom.intersection(bounds) + new_data = dict(dataset.data, geometry=geom) + return new_data @classmethod def iloc(cls, dataset, index): - raise NotImplementedError('iloc operation not implemented for geometries.') + from shapely.geometry import MultiPoint + rows, cols = index + + data = dict(dataset.data) + geom = data['geometry'] + + if isinstance(geom, MultiPoint): + if isscalar(rows) or isinstance(rows, slice): + geom = geom[rows] + elif isinstance(rows, (set, list)): + geom = MultiPoint([geom[r] for r in rows]) + data['geometry'] = geom + return data @classmethod def sample(cls, dataset, samples=[]): @@ -169,5 +295,56 @@ def concat(cls, datasets, dimensions, vdims): raise NotImplementedError('concat operation not implemented for geometries.') +def geom_from_dict(geom, xdim, ydim, single_type, multi_type): + from shapely.geometry import ( + Point, LineString, Polygon, MultiPoint, MultiPolygon, MultiLineString + ) + if (xdim, ydim) in geom: + xs, ys = np.asarray(geom.pop((xdim, ydim))).T + elif xdim in geom and ydim in geom: + xs, ys = geom.pop(xdim), geom.pop(ydim) + else: + raise ValueError('Could not find geometry dimensions') + + xscalar, yscalar = isscalar(xs), isscalar(ys) + if xscalar and yscalar: + xs, ys = np.array([xs]), np.array([ys]) + elif xscalar: + xs = np.full_like(ys, xs) + elif yscalar: + ys = np.full_like(xs, ys) + geom_array = np.column_stack([xs, ys]) + splits = np.where(np.isnan(geom_array[:, :2].astype('float')).sum(axis=1))[0] + if len(splits): + split_geoms = [g[:-1] if i == (len(splits)-1) else g + for i, g in enumerate(np.split(geom_array, splits+1))] + else: + split_geoms = [geom_array] + split_holes = geom.pop('holes', None) + if split_holes is not None and len(split_holes) != len(split_geoms): + raise DataError('Polygons with holes containing multi-geometries ' + 'must declare a list of holes for each geometry.') + + if single_type is Point: + if len(splits) > 1 or any(len(g) > 1 for g in split_geoms): + geom = MultiPoint(np.concatenate(split_geoms)) + else: + geom = Point(*split_geoms[0]) + elif len(splits): + if multi_type is MultiPolygon: + if split_holes is None: + split_holes = [[]]*len(split_geoms) + geom = MultiPolygon(list(zip(split_geoms, split_holes))) + else: + geom = MultiLineString(split_geoms) + elif single_type is Polygon: + if split_holes is None or not len(split_holes): + split_holes = [None] + geom = Polygon(split_geoms[0], split_holes[0]) + else: + geom = LineString(split_geoms[0]) + return geom + + MultiInterface.subtypes.insert(0, 'geom_dictionary') Interface.register(GeomDictInterface) diff --git a/geoviews/data/geopandas.py b/geoviews/data/geopandas.py index 30632d2d..58752337 100644 --- a/geoviews/data/geopandas.py +++ b/geoviews/data/geopandas.py @@ -3,14 +3,20 @@ import sys import warnings +from collections import defaultdict + import numpy as np +from holoviews.core.util import isscalar, unique_iterator, unique_array, pd from holoviews.core.data import Dataset, Interface, MultiInterface from holoviews.core.data.interface import DataError +from holoviews.core.data import PandasInterface +from holoviews.core.data.spatialpandas import get_value_array from holoviews.core.dimension import dimension_name from holoviews.element import Path from ..util import geom_to_array, geom_types, geom_length +from .geom_dict import geom_from_dict class GeoPandasInterface(MultiInterface): @@ -29,8 +35,8 @@ def loaded(cls): def applies(cls, obj): if not cls.loaded(): return False - from geopandas import GeoDataFrame - return isinstance(obj, GeoDataFrame) + from geopandas import GeoDataFrame, GeoSeries + return isinstance(obj, (GeoDataFrame, GeoSeries)) @classmethod def geo_column(cls, data): @@ -39,32 +45,38 @@ def geo_column(cls, data): if col in data and isinstance(data[col], GeoSeries): return col cols = [c for c in data.columns if isinstance(data[c], GeoSeries)] - if not cols: + if not cols and len(data): raise ValueError('No geometry column found in geopandas.DataFrame, ' 'use the PandasInterface instead.') - return cols[0] + return cols[0] if cols else None @classmethod def init(cls, eltype, data, kdims, vdims): import pandas as pd from geopandas import GeoDataFrame, GeoSeries + if kdims is None: + kdims = eltype.kdims + + if vdims is None: + vdims = eltype.vdims + if isinstance(data, GeoSeries): data = data.to_frame() + if isinstance(data, list): if all(isinstance(d, geom_types) for d in data): data = [{'geometry': d} for d in data] - if all('geometry' in d and isinstance(d['geometry'], geom_types) for d in data): + if all(isinstance(d, dict) and 'geometry' in d and isinstance(d['geometry'], geom_types) + for d in data): data = GeoDataFrame(data) + if not isinstance(data, GeoDataFrame): + data = from_multi(eltype, data, kdims, vdims) elif not isinstance(data, GeoDataFrame): - raise ValueError("GeoPandasInterface only support geopandas DataFrames.") + raise ValueError("GeoPandasInterface only support geopandas " + "DataFrames not %s." % type(data)) elif 'geometry' not in data: cls.geo_column(data) - if kdims is None: - kdims = eltype.kdims - - if vdims is None: - vdims = eltype.vdims index_names = data.index.names if isinstance(data, pd.DataFrame) else [data.index.name] if index_names == [None]: @@ -79,7 +91,10 @@ def init(cls, eltype, data, kdims, vdims): data = data.reset_index() break - shp_types = {gt[5:] if 'Multi' in gt else gt for gt in data.geom_type} + try: + shp_types = {gt[5:] if 'Multi' in gt else gt for gt in data.geom_type} + except: + shp_types = [] if len(shp_types) > 1: raise DataError('The GeopandasInterface can only read dataframes which ' 'share a common geometry type, found %s types.' % shp_types, @@ -104,7 +119,6 @@ def validate(cls, dataset, vdims=True): "dimensions, the following dimensions were " "not found: %s" % repr(not_found), cls) - @classmethod def dtype(cls, dataset, dimension): name = dataset.get_dimension(dimension, strict=True).name @@ -112,7 +126,6 @@ def dtype(cls, dataset, dimension): return np.dtype('float') # Geometry dimension return dataset.data[name].dtype - @classmethod def has_holes(cls, dataset): from shapely.geometry import Polygon, MultiPolygon @@ -140,7 +153,6 @@ def holes(cls, dataset): holes.append([[]]) return holes - @classmethod def select(cls, dataset, selection_mask=None, **selection): if cls.geom_dims(dataset): @@ -241,12 +253,19 @@ def dimension_type(cls, dataset, dim): return ds.interface.dimension_type(ds, dim) @classmethod - def isscalar(cls, dataset, dim): + def isscalar(cls, dataset, dim, per_geom=False): """ Tests if dimension is scalar in each subpath. """ - idx = dataset.get_dimension_index(dim) - return idx not in [0, 1] + dim = dataset.get_dimension(dim) + geom_dims = cls.geom_dims(dataset) + if dim in geom_dims: + return False + elif per_geom: + return all(isscalar(v) or len(list(unique_array(v))) == 1 + for v in dataset.data[dim.name]) + dim = dataset.get_dimension(dim) + return len(dataset.data[dim.name].unique()) == 1 @classmethod def range(cls, dataset, dim): @@ -269,9 +288,23 @@ def aggregate(cls, columns, dimensions, function, **kwargs): raise NotImplementedError @classmethod - def groupby(cls, columns, dimensions, container_type, group_type, **kwargs): - from holoviews.core.data import PandasInterface - return PandasInterface.groupby(columns, dimensions, container_type, group_type, **kwargs) + def add_dimension(cls, dataset, dimension, dim_pos, values, vdim): + data = dataset.data.copy() + geom_col = cls.geo_column(dataset.data) + if dim_pos >= list(data.columns).index(geom_col): + dim_pos -= 1 + if dimension.name not in data: + data.insert(dim_pos, dimension.name, values) + return data + + @classmethod + def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): + geo_dims = cls.geom_dims(dataset) + if any(d in geo_dims for d in dimensions): + raise DataError("GeoPandasInterface does not allow grouping " + "by geometry dimension.", cls) + + return PandasInterface.groupby(dataset, dimensions, container_type, group_type, **kwargs) @classmethod def reindex(cls, dataset, kdims=None, vdims=None): @@ -281,20 +314,25 @@ def reindex(cls, dataset, kdims=None, vdims=None): def sample(cls, columns, samples=[]): raise NotImplementedError + + @classmethod + def sort(cls, dataset, by=[], reverse=False): + geo_dims = cls.geom_dims(dataset) + if any(d in geo_dims for d in by): + raise DataError("SpatialPandasInterface does not allow sorting " + "by geometry dimension.", cls) + return PandasInterface.sort(dataset, by, reverse) + @classmethod def shape(cls, dataset): - from holoviews.core.data import PandasInterface - return PandasInterface.shape(dataset) + return (cls.length(dataset), len(dataset.dimensions())) @classmethod def length(cls, dataset): - from holoviews.core.data import PandasInterface - col = cls.geo_column(dataset.data) - length = sum([geom_length(g) for g in dataset.data[col]]) - geom_type = dataset.data.geom_type.iloc[0] + geom_type = cls.geom_type(dataset) if geom_type != 'Point': - length += (PandasInterface.length(dataset)-1) - return length + return len(dataset.data) + return sum([geom_length(g) for g in dataset.data.geometry]) @classmethod def nonzero(cls, dataset): @@ -302,7 +340,6 @@ def nonzero(cls, dataset): @classmethod def redim(cls, dataset, dimensions): - from holoviews.core.data import PandasInterface return PandasInterface.redim(dataset, dimensions) @classmethod @@ -310,11 +347,28 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep dimension = dataset.get_dimension(dimension) geom_dims = dataset.interface.geom_dims(dataset) data = dataset.data - if dimension not in geom_dims and not expanded: - data = data[dimension.name] - return data if keep_index else data.values - elif not len(data): - return np.array([]) + isgeom = (dimension in geom_dims) + geom_col = cls.geo_column(dataset.data) + is_points = cls.geom_type(dataset) == 'Point' + if not len(data): + dtype = float if isgeom else dataset.data[dimension.name].dtype + return np.array([], dtype=dtype) + + col = cls.geo_column(dataset.data) + if isgeom and keep_index: + return data[col] + elif not isgeom: + return get_value_array(data, dimension, expanded, keep_index, + geom_col, is_points, geom_length) + column = data[dimension.name] + if not expanded or keep_index or not len(data): + return column if keep_index else column.values + else: + arrays = [] + for i, geom in enumerate(data[col]): + length = geom_length(geom) + arrays.append(np.full(length, column.iloc[i])) + return np.concatenate(arrays) if len(arrays) > 1 else arrays[0] values = [] geom_type = data.geom_type.iloc[0] @@ -322,16 +376,85 @@ def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep for i, row in data.iterrows(): ds.data = row.to_dict() values.append(ds.interface.values(ds, dimension)) - if geom_type != 'Point': + if 'Point' not in geom_type and expanded: values.append([np.NaN]) - values = values if geom_type == 'Point' else values[:-1] + values = values if 'Point' in geom_type or not expanded else values[:-1] if len(values) == 1: return values[0] elif not values: return np.array([]) + elif not expanded: + array = np.empty(len(values), dtype=object) + array[:] = values + return array else: return np.concatenate(values) + @classmethod + def iloc(cls, dataset, index): + from geopandas import GeoSeries + from shapely.geometry import MultiPoint + rows, cols = index + geom_dims = cls.geom_dims(dataset) + geom_col = cls.geo_column(dataset.data) + scalar = False + columns = list(dataset.data.columns) + if isinstance(cols, slice): + cols = [d.name for d in dataset.dimensions()][cols] + elif np.isscalar(cols): + scalar = np.isscalar(rows) + cols = [dataset.get_dimension(cols).name] + else: + cols = [dataset.get_dimension(d).name for d in index[1]] + if not all(d in cols for d in geom_dims): + raise DataError("Cannot index a dimension which is part of the " + "geometry column of a spatialpandas DataFrame.", cls) + cols = list(unique_iterator([ + columns.index(geom_col) if c in geom_dims else columns.index(c) for c in cols + ])) + + geom_type = dataset.data[geom_col].geom_type.iloc[0] + if geom_type != 'MultiPoint': + if scalar: + return dataset.data.iloc[rows[0], cols[0]] + elif isscalar(rows): + rows = [rows] + return dataset.data.iloc[rows, cols] + + geoms = dataset.data[geom_col] + count = 0 + new_geoms, indexes = [], [] + for i, geom in enumerate(geoms): + length = len(geom) + if np.isscalar(rows): + if count <= rows < (count+length): + new_geoms.append(geom[rows-count]) + indexes.append(i) + break + elif isinstance(rows, slice): + if rows.start is not None and rows.start > (count+length): + continue + elif rows.stop is not None and rows.stop < count: + break + start = None if rows.start is None else max(rows.start - count, 0) + stop = None if rows.stop is None else min(rows.stop - count, length) + if rows.step is not None: + dataset.param.warning(".iloc step slicing currently not supported for" + "the multi-tabular data format.") + indexes.append(i) + new_geoms.append(geom[start:stop]) + elif isinstance(rows, (list, set)): + sub_rows = [(r-count) for r in rows if count <= r < (count+length)] + if not sub_rows: + continue + indexes.append(i) + new_geoms.append(MultiPoint([geom[r] for r in sub_rows])) + count += length + + new = dataset.data.iloc[indexes].copy() + new[geom_col] = GeoSeries(new_geoms) + return new + @classmethod def split(cls, dataset, start, end, datatype, **kwargs): objs = [] @@ -343,22 +466,26 @@ def split(cls, dataset, start, end, datatype, **kwargs): arr = geom_to_array(row[col]) d = {(xdim.name, ydim.name): arr} d.update({vd.name: row[vd.name] for vd in dataset.vdims}) - ds = dataset.clone(d, datatype=['dictionary']) + geom_type = cls.geom_type(dataset) + ds = dataset.clone([d], datatype=['multitabular']) for i, row in dataset.data.iterrows(): if datatype == 'geom': objs.append(row[col]) continue geom = row[col] + gt = geom_type or get_geom_type(geom) + arr = geom_to_array(geom) d = {xdim.name: arr[:, 0], ydim.name: arr[:, 1]} d.update({vd.name: row[vd.name] for vd in dataset.vdims}) - ds.data = d + ds.data = [d] if datatype == 'array': obj = ds.array(**kwargs) elif datatype == 'dataframe': obj = ds.dframe(**kwargs) - elif datatype == 'columns': - obj = ds.columns(**kwargs) + elif datatype in ('columns', 'dictionary'): + d['geom_type'] = gt + obj = d elif datatype is None: obj = ds.clone() else: @@ -367,6 +494,114 @@ def split(cls, dataset, start, end, datatype, **kwargs): return objs +def get_geom_type(geom): + """Returns the HoloViews geometry type. + + Args: + geom: A shapely geometry + + Returns: + A string representing type of the geometry. + """ + from shapely.geometry import ( + Point, LineString, Polygon, Ring, MultiPoint, MultiPolygon, MultiLineString + ) + if isinstance(geom, (Point, MultiPoint)): + return 'Point' + elif isinstance(geom, (LineString, MultiLineString)): + return 'Line' + elif isinstance(geom, Ring): + return 'Ring' + elif isinstance(geom, (Polygon, MultiPolygon)): + return 'Polygon' + + +def to_geopandas(data, xdim, ydim, columns=[], geom='point'): + """Converts list of dictionary format geometries to spatialpandas line geometries. + + Args: + data: List of dictionaries representing individual geometries + xdim: Name of x-coordinates column + ydim: Name of y-coordinates column + ring: Whether the data represents a closed ring + + Returns: + A spatialpandas.GeoDataFrame version of the data + """ + from geopandas import GeoDataFrame + from shapely.geometry import ( + Point, LineString, Polygon, MultiPoint, MultiPolygon, MultiLineString + ) + poly = any('holes' in d for d in data) or geom == 'Polygon' + if poly: + single_type, multi_type = Polygon, MultiPolygon + elif geom == 'Line': + single_type, multi_type = LineString, MultiLineString + else: + single_type, multi_type = Point, MultiPoint + + converted = defaultdict(list) + for geom_dict in data: + geom_dict = dict(geom_dict) + geom = geom_from_dict(geom_dict, xdim, ydim, single_type, multi_type) + for c, v in geom_dict.items(): + converted[c].append(v) + converted['geometry'].append(geom) + + return GeoDataFrame(converted, columns=['geometry']+columns) + + +def from_multi(eltype, data, kdims, vdims): + """Converts list formats into geopandas.GeoDataFrame. + + Args: + eltype: Element type to convert + data: The original data + kdims: The declared key dimensions + vdims: The declared value dimensions + + Returns: + A GeoDataFrame containing the data in the list based format. + """ + + from geopandas import GeoDataFrame + + new_data = [] + types = [] + xname, yname = (kd.name for kd in kdims[:2]) + for d in data: + types.append(type(d)) + if isinstance(d, dict): + d = {k: v if isscalar(v) else np.asarray(v) for k, v in d.items()} + new_data.append(d) + continue + new_el = eltype(d, kdims, vdims) + if new_el.interface is GeoPandasInterface: + types[-1] = GeoDataFrame + new_data.append(new_el.data) + continue + new_dict = {} + for d in new_el.dimensions(): + if d in (xname, yname): + scalar = False + else: + scalar = new_el.interface.isscalar(new_el, d) + vals = new_el.dimension_values(d, not scalar) + new_dict[d.name] = vals[0] if scalar else vals + new_data.append(new_dict) + if len(set(types)) > 1: + raise DataError('Mixed types not supported') + if new_data and types[0] is GeoDataFrame: + data = pd.concat(new_data) + else: + columns = [d.name for d in kdims+vdims if d not in (xname, yname)] + geom = GeoPandasInterface.geom_type(eltype) + if not len(data): + return GeoDataFrame([], columns=['geometry']+columns) + data = to_geopandas(new_data, xname, yname, columns, geom) + return data + + Interface.register(GeoPandasInterface) -Dataset.datatype = ['geodataframe']+Dataset.datatype -Path.datatype = ['geodataframe']+Path.datatype +Dataset.datatype = Dataset.datatype+['geodataframe'] +Path.datatype = Path.datatype+['geodataframe'] diff --git a/geoviews/tests/data/testgeopandasinterface.py b/geoviews/tests/data/testgeopandasinterface.py new file mode 100644 index 00000000..478770d8 --- /dev/null +++ b/geoviews/tests/data/testgeopandasinterface.py @@ -0,0 +1,177 @@ +""" +Test for the GeoPandasInterface +""" +from unittest import SkipTest + +import numpy as np + +from shapely import geometry as sgeom + +try: + import geopandas + from geopandas.array import GeometryDtype +except: + geopandas = None + +from holoviews.core.util import pd +from holoviews.core.data import Dataset +from holoviews.core.data.interface import DataError +from holoviews.element import Polygons, Path, Points +from holoviews.element.comparison import ComparisonTestCase +from holoviews.tests.core.data.testmultiinterface import GeomTests + +from geoviews.data import GeoPandasInterface + +from .testmultigeominterface import GeomInterfaceTest + + +class RoundTripTests(ComparisonTestCase): + + datatype = None + + interface = None + + __test__ = False + + def test_point_roundtrip(self): + points = Points([{'x': 0, 'y': 1, 'z': 0}, + {'x': 1, 'y': 0, 'z': 1}], ['x', 'y'], + 'z', datatype=[self.datatype]) + self.assertIsInstance(points.data.geometry.dtype, GeometryDtype) + roundtrip = points.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Points([{'x': 0, 'y': 1, 'z': 0}, + {'x': 1, 'y': 0, 'z': 1}], ['x', 'y'], + 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_multi_point_roundtrip(self): + xs = [1, 2, 3, 2] + ys = [2, 0, 7, 4] + points = Points([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(points.data.geometry.dtype, GeometryDtype) + roundtrip = points.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Points([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_line_roundtrip(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + path = Path([{'x': xs, 'y': ys, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(path.data.geometry.dtype, GeometryDtype) + roundtrip = path.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Path([{'x': xs, 'y': ys, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_multi_line_roundtrip(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + path = Path([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(path.data.geometry.dtype, GeometryDtype) + roundtrip = path.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Path([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_polygon_roundtrip(self): + xs = [1, 2, 3] + ys = [2, 0, 7] + poly = Polygons([{'x': xs, 'y': ys, 'z': 0}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 1}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(poly.data.geometry.dtype, GeometryDtype) + roundtrip = poly.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Polygons([{'x': xs+[1], 'y': ys+[2], 'z': 0}, + {'x': xs[::-1]+[3], 'y': ys[::-1]+[7], 'z': 1}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + def test_multi_polygon_roundtrip(self): + xs = [1, 2, 3, np.nan, 6, 7, 3] + ys = [2, 0, 7, np.nan, 7, 5, 2] + holes = [ + [[(1.5, 2), (2, 3), (1.6, 1.6)], [(2.1, 4.5), (2.5, 5), (2.3, 3.5)]], + [] + ] + poly = Polygons([{'x': xs, 'y': ys, 'holes': holes, 'z': 1}, + {'x': xs[::-1], 'y': ys[::-1], 'z': 2}], + ['x', 'y'], 'z', datatype=[self.datatype]) + self.assertIsInstance(poly.data.geometry.dtype, GeometryDtype) + roundtrip = poly.clone(datatype=['multitabular']) + self.assertEqual(roundtrip.interface.datatype, 'multitabular') + expected = Polygons([{'x': [1, 2, 3, 1, np.nan, 6, 7, 3, 6], + 'y': [2, 0, 7, 2, np.nan, 7, 5, 2, 7], 'holes': holes, 'z': 1}, + {'x': [3, 7, 6, 3, np.nan, 3, 2, 1, 3], + 'y': [2, 5, 7, 2, np.nan, 7, 0, 2, 7], 'z': 2}], + ['x', 'y'], 'z', datatype=['multitabular']) + self.assertEqual(roundtrip, expected) + + + +class GeoPandasInterfaceTest(GeomInterfaceTest, GeomTests, RoundTripTests): + """ + Test of the GeoPandasInterface. + """ + + datatype = 'geodataframe' + interface = GeoPandasInterface + + __test__ = True + + def setUp(self): + if geopandas is None: + raise SkipTest('GeoPandasInterface requires geopandas, skipping tests') + super(GeoPandasInterfaceTest, self).setUp() + + def test_df_dataset(self): + if not pd: + raise SkipTest('Pandas not available') + dfs = [pd.DataFrame(np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]), columns=['x', 'y']) + for i in range(2)] + mds = Path(dfs, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + for i, ds in enumerate(mds.split(datatype='dataframe')): + ds['x'] = ds.x.astype(int) + ds['y'] = ds.y.astype(int) + self.assertEqual(ds, dfs[i]) + + def test_multi_geom_point_coord_values(self): + geoms = [{'geometry': sgeom.Point([(0, 1)])}, + {'geometry': sgeom.Point([(3, 5)])}] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(mds.dimension_values('x'), np.array([0, 3])) + self.assertEqual(mds.dimension_values('y'), np.array([1, 5])) + + def test_multi_geom_point_length(self): + geoms = [{'geometry': sgeom.Point([(0, 0)])}, + {'geometry': sgeom.Point([(3, 3)])}] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(len(mds), 2) + + def test_array_points_iloc_index_rows_index_cols(self): + arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] + mds = Dataset(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + with self.assertRaises(DataError): + mds.iloc[3, 0] + + def test_polygon_dtype(self): + poly = Polygons([{'x': [1, 2, 3], 'y': [2, 0, 7]}], datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.interface.dtype(poly, 'x'), + 'float64') diff --git a/geoviews/tests/data/testmultigeominterface.py b/geoviews/tests/data/testmultigeominterface.py new file mode 100644 index 00000000..cb0a34df --- /dev/null +++ b/geoviews/tests/data/testmultigeominterface.py @@ -0,0 +1,183 @@ +""" +Test for the MultiInterface and GeomDictInterface +""" +from unittest import SkipTest + +import numpy as np + +from holoviews.core.data import Dataset, MultiInterface +from holoviews.core.data.interface import DataError +from holoviews.core.util import pd +from holoviews.element import Polygons, Path +from holoviews.element.comparison import ComparisonTestCase +from holoviews.tests.core.data.testmultiinterface import MultiBaseInterfaceTest + +try: + from shapely import geometry as sgeom +except: + sgeom = None + +try: + import spatialpandas +except: + spatialpandas = None + +from geoviews.data.geom_dict import GeomDictInterface + + +class GeomInterfaceTest(ComparisonTestCase): + """ + Test for the MultiInterface and GeomDictInterface. + """ + + __test__ = False + + def setUp(self): + if sgeom is None: + raise SkipTest('GeomInterfaceTest requires shapely, skipping tests') + super(GeomInterfaceTest, self).setUp() + + def test_multi_geom_dataset_geom_list_constructor(self): + geoms = [sgeom.Polygon([(0, 0), (3, 3), (6, 0)])] + Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + + def test_multi_geom_dataset_geom_dict_constructor(self): + geoms = [{'geometry': sgeom.Polygon([(0, 0), (3, 3), (6, 0)])}] + Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + + def test_multi_geom_dataset_geom_dict_constructor_extra_kdim(self): + geoms = [{'geometry': sgeom.Polygon([(0, 0), (3, 3), (6, 0)]), 'z': 1}] + Dataset(geoms, kdims=['x', 'y', 'z'], datatype=[self.datatype]) + + def test_multi_geom_dataset_poly_coord_values(self): + geoms = [sgeom.Polygon([(0, 0), (6, 6), (3, 3)])] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(mds.dimension_values('x'), np.array([0, 6, 3, 0])) + self.assertEqual(mds.dimension_values('y'), np.array([0, 6, 3, 0])) + + def test_multi_geom_dataset_poly_scalar_values(self): + geoms = [{'geometry': sgeom.Polygon([(0, 0), (3, 3), (6, 0)]), 'z': 1}] + mds = Dataset(geoms, kdims=['x', 'y', 'z'], datatype=[self.datatype]) + self.assertEqual(mds.dimension_values('z'), np.array([1, 1, 1, 1])) + self.assertEqual(mds.dimension_values('z', expanded=False), np.array([1])) + + def test_multi_geom_poly_length(self): + geoms = [{'geometry': sgeom.Polygon([(0, 0), (3, 3), (6, 0)])}, + {'geometry': sgeom.Polygon([(3, 3), (9, 3), (6, 0)])}] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(len(mds), 2) + + def test_multi_geom_poly_range(self): + geoms = [{'geometry': sgeom.Polygon([(0, 0), (3, 3), (6, 0)])}, + {'geometry': sgeom.Polygon([(3, 3), (9, 3), (6, 0)])}] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(mds.range('x'), (0, 9)) + self.assertEqual(mds.range('y'), (0, 3)) + + def test_multi_geom_dataset_line_string_coord_values(self): + geoms = [sgeom.LineString([(0, 0), (3, 3), (6, 0)])] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(mds.dimension_values('x'), np.array([0, 3, 6])) + self.assertEqual(mds.dimension_values('y'), np.array([0, 3, 0])) + + def test_multi_geom_dataset_line_string_scalar_values(self): + geoms = [{'geometry': sgeom.LineString([(0, 0), (3, 3), (6, 0)]), 'z': 1}] + mds = Dataset(geoms, kdims=['x', 'y', 'z'], datatype=[self.datatype]) + self.assertEqual(mds.dimension_values('z'), np.array([1, 1, 1])) + self.assertEqual(mds.dimension_values('z', expanded=False), np.array([1])) + + def test_multi_geom_line_string_length(self): + geoms = [{'geometry': sgeom.LineString([(0, 0), (3, 3), (6, 0)])}, + {'geometry': sgeom.LineString([(3, 3), (9, 3), (6, 0)])}] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(len(mds), 2) + + def test_multi_geom_point_length(self): + geoms = [{'geometry': sgeom.Point([(0, 0)])}, + {'geometry': sgeom.Point([(3, 3)])}] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(len(mds), 2) + + def test_multi_geom_point_coord_values(self): + geoms = [{'geometry': sgeom.Point([(0, 1)])}, + {'geometry': sgeom.Point([(3, 5)])}] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(mds.dimension_values('x'), np.array([0, 3])) + self.assertEqual(mds.dimension_values('y'), np.array([1, 5])) + + def test_multi_geom_point_coord_range(self): + geoms = [{'geometry': sgeom.Point([(0, 1)])}, + {'geometry': sgeom.Point([(3, 5)])}] + mds = Dataset(geoms, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertEqual(mds.range('x'), (0, 3)) + self.assertEqual(mds.range('y'), (1, 5)) + + def test_multi_dict_groupby(self): + geoms = [{'geometry': sgeom.Polygon([(2, 0), (1, 2), (0, 0)]), 'z': 1}, + {'geometry': sgeom.Polygon([(3, 3), (3, 3), (6, 0)]), 'z': 2}] + mds = Dataset(geoms, kdims=['x', 'y', 'z'], datatype=[self.datatype]) + for i, (k, ds) in enumerate(mds.groupby('z').items()): + self.assertEqual(k, geoms[i]['z']) + self.assertEqual(ds, Dataset([geoms[i]], kdims=['x', 'y'])) + + +class MultiGeomInterfaceTest(GeomInterfaceTest): + + datatype = 'multitabular' + interface = GeomDictInterface + + __test__ = True + + +class SpatialPandasGeomInterfaceTest(GeomInterfaceTest): + + datatype = 'spatialpandas' + + __test__ = True + + def setUp(self): + if spatialpandas is None: + raise SkipTest('SpatialPandasInterface requires spatialpandas, skipping tests') + super(SpatialPandasGeomInterfaceTest, self).setUp() + + +class MultiGeomDictInterfaceTest(MultiBaseInterfaceTest): + + datatype = 'multitabular' + interface = MultiInterface + subtype = 'geom_dictionary' + + __test__ = True + + def test_dict_dataset(self): + dicts = [{'x': np.arange(i, i+2), 'y': np.arange(i, i+2)} for i in range(2)] + mds = Path(dicts, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + for i, cols in enumerate(mds.split(datatype='columns')): + self.assertEqual(dict(cols), dict(dicts[i], geom_type='Line', + geometry=mds.data[i]['geometry'])) + + def test_polygon_dtype(self): + poly = Polygons([{'x': [1, 2, 3], 'y': [2, 0, 7]}], datatype=[self.datatype]) + self.assertIs(poly.interface, self.interface) + self.assertEqual(poly.interface.dtype(poly, 'x'), + 'float64') + + def test_array_points_iloc_index_rows_index_cols(self): + arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] + mds = Dataset(arrays, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + with self.assertRaises(DataError): + mds.iloc[3, 0] + + def test_df_dataset(self): + if not pd: + raise SkipTest('Pandas not available') + dfs = [pd.DataFrame(np.column_stack([np.arange(i, i+2), np.arange(i, i+2)]), columns=['x', 'y']) + for i in range(2)] + mds = Path(dfs, kdims=['x', 'y'], datatype=[self.datatype]) + self.assertIs(mds.interface, self.interface) + for i, ds in enumerate(mds.split(datatype='dataframe')): + ds['x'] = ds.x.astype(int) + ds['y'] = ds.y.astype(int) + self.assertEqual(ds, dfs[i]) diff --git a/geoviews/util.py b/geoviews/util.py index eeac3091..d8e92cc9 100644 --- a/geoviews/util.py +++ b/geoviews/util.py @@ -314,15 +314,17 @@ def geom_to_arr(geom): xy = getattr(geom, 'xy', None) except NotImplementedError: xy = None + if xy is not None: return np.column_stack(xy) if hasattr(geom, 'array_interface'): data = geom.array_interface() return np.array(data['data']).reshape(data['shape'])[:, :2] arr = geom.array_interface_base['data'] + if (len(arr) % 2) != 0: arr = arr[:-1] - return np.array(arr).reshape(int(len(arr)/2), 2) + return np.array(arr).reshape(-1, 2) def geom_length(geom): @@ -336,9 +338,13 @@ def geom_length(geom): if not geom.geom_type.startswith('Multi') and hasattr(geom, 'array_interface_base'): return len(geom.array_interface_base['data'])//2 else: + glength = len(geom) length = 0 - for g in geom: + for i, g in enumerate(geom): length += geom_length(g) + if 'Point' not in geom.geom_type and (i+1 != glength): + length += 1 + return length @@ -352,18 +358,19 @@ def geom_to_array(geom): xs = np.array(geom.exterior.coords.xy[0]) ys = np.array(geom.exterior.coords.xy[1]) elif geom.geom_type in ('LineString', 'LinearRing'): - arr = geom_to_arr(geom) - return arr + return geom_to_arr(geom) + elif geom.geom_type == 'MultiPoint': + arrays = [] + for g in geom: + if g.geom_type == 'Point': + arrays.append(np.array(g.xy).T) + return np.concatenate(arrays) if arrays else np.array([]) else: - xs, ys = [], [] + arrays = [] for g in geom: - arr = geom_to_arr(g) - xs.append(arr[:, 0]) - ys.append(arr[:, 1]) - xs.append([np.NaN]) - ys.append([np.NaN]) - xs = np.concatenate(xs[:-1]) if xs else np.array([]) - ys = np.concatenate(ys[:-1]) if ys else np.array([]) + arrays.append(geom_to_arr(g)) + arrays.append(np.array([[np.nan, np.nan]])) + return np.concatenate(arrays[:-1]) if arrays else np.array([]) return np.column_stack([xs, ys])