diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..c98a765 --- /dev/null +++ b/NOTICE @@ -0,0 +1,24 @@ +This work includes components that were originally developed in the fletcher library by +Uwe L. Korn which is distributed under the following licence: + +MIT License + +Copyright (c) 2018 Uwe L. Korn + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/setup.py b/setup.py index 4d75620..e6791ca 100644 --- a/setup.py +++ b/setup.py @@ -2,5 +2,5 @@ setup(name='spatialpandas', packages=find_packages(exclude=('tests',)), - install_requires=['pandas', 'dask', 'numba', 'numpy'], + install_requires=['pandas', 'dask', 'numba', 'numpy', 'pyarrow>=0.15'], tests_require=['pytest', 'hypothesis']) diff --git a/spatialpandas/__init__.py b/spatialpandas/__init__.py index e69de29..ce44773 100644 --- a/spatialpandas/__init__.py +++ b/spatialpandas/__init__.py @@ -0,0 +1,2 @@ +from . import geometry +from . import spatialindex diff --git a/spatialpandas/geometry/__init__.py b/spatialpandas/geometry/__init__.py new file mode 100644 index 0000000..73f0e45 --- /dev/null +++ b/spatialpandas/geometry/__init__.py @@ -0,0 +1,7 @@ +from .polygon import Polygon, PolygonArray, PolygonDtype +from .multipolygon import MultiPolygon, MultiPolygonArray, MultiPolygonDtype +from .line import Line, LineArray, LineDtype +from .multiline import MultiLine, MultiLineArray, MultiLineDtype +from .multipoint import MultiPoint, MultiPointArray, MultiPointDtype +from .ring import Ring, RingArray, RingDtype +from .base import Geometry, GeometryArray, GeometryDtype diff --git a/spatialpandas/geometry/_algorithms.py b/spatialpandas/geometry/_algorithms.py new file mode 100644 index 0000000..f408b3a --- /dev/null +++ b/spatialpandas/geometry/_algorithms.py @@ -0,0 +1,243 @@ +from math import sqrt + +import numpy as np +from numba import jit, prange + +from spatialpandas.utils import ngjit + + +@ngjit +def bounds_interleaved(values): + """ + compute bounds + """ + xmin = np.inf + ymin = np.inf + xmax = -np.inf + ymax = -np.inf + + for i in range(0, len(values), 2): + x = values[i] + if np.isfinite(x): + xmin = min(xmin, x) + xmax = max(xmax, x) + + y = values[i + 1] + if np.isfinite(y): + ymin = min(ymin, y) + ymax = max(ymax, y) + + return (xmin, ymin, xmax, ymax) + + +@ngjit +def bounds_interleaved_1d(values, offset): + """ + compute bounds + """ + vmin = np.inf + vmax = -np.inf + + for i in range(0, len(values), 2): + v = values[i + offset] + if np.isfinite(v): + vmin = min(vmin, v) + vmax = max(vmax, v) + + return (vmin, vmax) + + +@ngjit +def compute_line_length(values, value_offsets): + total_len = 0.0 + for offset_ind in range(len(value_offsets) - 1): + start = value_offsets[offset_ind] + stop = value_offsets[offset_ind + 1] + x0 = values[start] + y0 = values[start + 1] + + for i in range(start + 2, stop, 2): + x1 = values[i] + y1 = values[i + 1] + + if (np.isfinite(x0) and np.isfinite(y0) and + np.isfinite(x1) and np.isfinite(y1)): + total_len += sqrt((x1 - x0) ** 2 + (y1 - y0) ** 2) + + x0 = x1 + y0 = y1 + + return total_len + + +@ngjit +def compute_area(values, value_offsets): + area = 0.0 + + for offset_ind in range(len(value_offsets) - 1): + start = value_offsets[offset_ind] + stop = value_offsets[offset_ind + 1] + poly_length = stop - start + + if poly_length < 6: + # A degenerate polygon, zero area + continue + + for k in range(start, stop - 4, 2): + i, j = k + 2, k + 4 + ix = values[i] + jy = values[j + 1] + ky = values[k + 1] + + area += ix * (jy - ky) + + # wrap-around term for polygon + firstx = values[start] + secondy = values[start + 3] + lasty = values[stop - 3] + area += firstx * (secondy - lasty) + + return area / 2.0 + + +@jit(nogil=True, nopython=True, parallel=True) +def geometry_map_nested1( + fn, result, result_offset, values, value_offsets, missing +): + assert len(value_offsets) == 1 + value_offsets0 = value_offsets[0] + n = len(value_offsets0) - 1 + for i in prange(n): + if not missing[i]: + result[i + result_offset] = fn(values, value_offsets0[i:i + 2]) + + +@jit(nogil=True, nopython=True, parallel=True) +def geometry_map_nested2( + fn, result, result_offset, values, value_offsets, missing +): + assert len(value_offsets) == 2 + value_offsets0 = value_offsets[0] + value_offsets1 = value_offsets[1] + n = len(value_offsets0) - 1 + for i in prange(n): + if not missing[i]: + start = value_offsets0[i] + stop = value_offsets0[i + 1] + result[i + result_offset] = fn(values, value_offsets1[start:stop + 1]) + + +@jit(nogil=True, nopython=True, parallel=True) +def geometry_map_nested3( + fn, result, result_offset, values, value_offsets, missing +): + assert len(value_offsets) == 3 + value_offsets0 = value_offsets[0] + value_offsets1 = value_offsets[1] + value_offsets2 = value_offsets[2] + n = len(value_offsets0) - 1 + for i in prange(n): + if not missing[i]: + start = value_offsets1[value_offsets0[i]] + stop = value_offsets1[value_offsets0[i + 1]] + result[i + result_offset] = fn(values, value_offsets2[start:stop + 1]) + + +@jit(nopython=True, nogil=True) +def _lexographic_lt0(a1, a2): + """ + Compare two 1D numpy arrays lexographically + Parameters + ---------- + a1: ndarray + 1D numpy array + a2: ndarray + 1D numpy array + + Returns + ------- + comparison: + True if a1 < a2, False otherwise + """ + for e1, e2 in zip(a1, a2): + if e1 < e2: + return True + elif e1 > e2: + return False + return len(a1) < len(a2) + + +def _lexographic_lt(a1, a2): + if a1.dtype != np.object and a1.dtype != np.object: + # a1 and a2 primitive + return _lexographic_lt0(a1, a2) + elif a1.dtype == np.object and a1.dtype == np.object: + # a1 and a2 object, process recursively + for e1, e2 in zip(a1, a2): + if _lexographic_lt(e1, e2): + return True + elif _lexographic_lt(e2, e1): + return False + return len(a1) < len(a2) + elif a1.dtype != np.object: + # a2 is object array, a1 primitive + return True + else: + # a1 is object array, a2 primitive + return False + + +@ngjit +def _extract_isnull_bytemap(bitmap, bitmap_length, bitmap_offset, dst_offset, dst): + """ + Note: Copied from fletcher: See NOTICE for license info + + (internal) write the values of a valid bitmap as bytes to a pre-allocatored + isnull bytemap. + + Parameters + ---------- + bitmap: pyarrow.Buffer + bitmap where a set bit indicates that a value is valid + bitmap_length: int + Number of bits to read from the bitmap + bitmap_offset: int + Number of bits to skip from the beginning of the bitmap. + dst_offset: int + Number of bytes to skip from the beginning of the output + dst: numpy.array(dtype=bool) + Pre-allocated numpy array where a byte is set when a value is null + """ + for i in range(bitmap_length): + idx = bitmap_offset + i + byte_idx = idx // 8 + bit_mask = 1 << (idx % 8) + dst[dst_offset + i] = (bitmap[byte_idx] & bit_mask) == 0 + + +def extract_isnull_bytemap(list_array): + """ + Note: Copied from fletcher: See NOTICE for license info + + Extract the valid bitmaps of a chunked array into numpy isnull bytemaps. + + Parameters + ---------- + chunked_array: pyarrow.ChunkedArray + + Returns + ------- + valid_bytemap: numpy.array + """ + result = np.zeros(len(list_array), dtype=bool) + + offset = 0 + chunk = list_array + valid_bitmap = chunk.buffers()[0] + if valid_bitmap: + buf = memoryview(valid_bitmap) + _extract_isnull_bytemap(buf, len(chunk), chunk.offset, offset, result) + else: + return np.full(len(list_array), False) + + return result diff --git a/spatialpandas/geometry/base.py b/spatialpandas/geometry/base.py new file mode 100644 index 0000000..04bd93c --- /dev/null +++ b/spatialpandas/geometry/base.py @@ -0,0 +1,636 @@ +from functools import total_ordering +from numbers import Integral +from typing import Iterable + +import pyarrow as pa +import numpy as np +import re + +from pandas.api.types import is_array_like +from pandas.api.extensions import ( + ExtensionArray, ExtensionDtype, register_extension_dtype +) + +from spatialpandas.geometry._algorithms import ( + bounds_interleaved, bounds_interleaved_1d, _lexographic_lt, + extract_isnull_bytemap) + +try: + import shapely.geometry as sg +except ImportError: + sg = None + + +def _validate_nested_arrow_type(nesting_levels, pyarrow_type): + if pyarrow_type == pa.null(): + return pa.null() + + pyarrow_element_type = pyarrow_type + for i in range(nesting_levels): + if not isinstance(pyarrow_element_type, pa.ListType): + raise ValueError( + "Expected input data to have {} nested layer(s)".format( + nesting_levels) + ) + pyarrow_element_type = pyarrow_element_type.value_type + pyarrow_element_type = pyarrow_element_type + numpy_element_dtype = pyarrow_element_type.to_pandas_dtype() + if (numpy_element_dtype() is None + or numpy_element_dtype().dtype.kind not in ('i', 'u', 'f')): + raise ValueError( + "Invalid nested element type {}, expected numeric type".format( + pyarrow_element_type + )) + return pyarrow_element_type + + +def _unwrap_geometry(a, element_dtype): + if np.isscalar(a) and np.isnan(a): + # replace top-level nana with None + return None + elif isinstance(a, Geometry): + return np.asarray(a.data) + elif sg and isinstance(a, sg.base.BaseGeometry): + return element_dtype._shapely_to_coordinates(a) + else: + return a + + +class _ArrowBufferMixin(object): + @property + def buffer_values(self): + buffers = self.data.buffers() + return np.asarray(buffers[-1]).view(self.numpy_dtype) + + @property + def buffer_offsets(self): + buffers = self.data.buffers() + if len(buffers) < 2: + return (np.array([0]),) + + # Slice first offsets array to match any current extension array slice + # All other buffers remain unchanged + start = self.data.offset + stop = start + len(self.data) + 1 + offsets1 = np.asarray(buffers[1]).view(np.uint32)[start:stop] + + remaining_offsets = tuple( + np.asarray(buffers[i]).view(np.uint32) + for i in range(3, len(buffers) - 1, 2) + ) + + return (offsets1,) + remaining_offsets + + @property + def flat_values(self): + # Compute valid start/stop index into buffer values array. + buffer_offsets = self.buffer_offsets + start = buffer_offsets[0][0] + stop = buffer_offsets[0][-1] + for offsets in buffer_offsets[1:]: + start = offsets[start] + stop = offsets[stop] + + return self.buffer_values[start:stop] + + +class GeometryDtype(ExtensionDtype): + _geometry_name = 'geometry' + base = np.dtype('O') + _metadata = ('_dtype',) + na_value = np.nan + + def __init__(self, subtype): + if isinstance(subtype, GeometryDtype): + self.subtype = subtype.subtype + else: + self.subtype = np.dtype(subtype) + + # Validate the subtype is numeric + if self.subtype.kind not in ('i', 'u', 'f'): + raise ValueError("Received non-numeric type of kind '{}'".format(self.kind)) + + # build nested arrow type + nesting_levels = self.construct_array_type()._nesting_levels + arrow_dtype = pa.from_numpy_dtype(self.subtype) + for i in range(nesting_levels): + arrow_dtype = pa.list_(arrow_dtype) + + self.arrow_dtype = arrow_dtype + + def __hash__(self): + return hash((self.__class__, self.arrow_dtype)) + + def __str__(self): + return "{}[{}]".format(self._geometry_name, str(self.subtype.name)) + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, str(self.subtype.name)) + + @classmethod + def _parse_subtype(cls, dtype_string): + """ + Parse a datatype string to get the subtype + + Parameters + ---------- + dtype_string: str + A string like line[subtype] + + Returns + ------- + subtype: str + + Raises + ------ + ValueError + When the subtype cannot be extracted + """ + # Be case insensitive + dtype_string = dtype_string.lower() + subtype_re = re.compile('^' + cls._geometry_name + r"\[(?P\w+)\]$") + + match = subtype_re.match(dtype_string) + if match: + subtype_string = match.groupdict()['subtype'] + elif dtype_string == cls._geometry_name.lower(): + subtype_string = 'float64' + else: + raise ValueError("Cannot parse {dtype_string}".format( + dtype_string=dtype_string)) + + return subtype_string + + @classmethod + def construct_array_type(cls, *args): + return GeometryArray + + @classmethod + def construct_from_string(cls, string): + # lowercase string + string = string.lower() + + msg = "Cannot construct a '%s' from '{}'" % cls.__name__ + if string.startswith(cls._geometry_name.lower()): + # Extract subtype + try: + subtype_string = cls._parse_subtype(string) + return cls(subtype_string) + except Exception: + raise TypeError(msg.format(string)) + else: + raise TypeError(msg.format(string)) + + def __eq__(self, other): + """Check whether 'other' is equal to self. + By default, 'other' is considered equal if + * it's a string matching 'self.name', or + * it's an instance of this type. + Parameters + ---------- + other : Any + Returns + ------- + bool + """ + if isinstance(other, type(self)): + return self.subtype == other.subtype + elif isinstance(other, str): + return str(self) == other + else: + return False + + @property + def type(self): + # type: () -> type + """The scalar type for the array, e.g. ``int``. + It's expected ``ExtensionArray[item]`` returns an instance + of ``ExtensionDtype.type`` for scalar ``item``. + """ + return Geometry + + @property + def name(self): + # type: () -> str + """A string identifying the data type. + Will be used for display in, e.g. ``Series.dtype`` + """ + return str(self) + + +@total_ordering +class Geometry(_ArrowBufferMixin): + _nesting_levels = 0 + + def __init__(self, data): + if isinstance(data, pa.ListArray): + # Use arrow ListArray as is + self.data = data + else: + self.data = pa.array(data) + if len(self.data) > 0: + _validate_nested_arrow_type(self._nesting_levels, self.data.type) + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, self.data.to_pylist()) + + def __hash__(self): + return hash((self.__class__, np.asarray(self.data).tobytes())) + + def __eq__(self, other): + if type(other) is not type(self): + return False + return np.array_equal(np.asarray(self.data), np.asarray(other.data)) + + def __lt__(self, other): + if type(other) is not type(self): + return NotImplemented + return _lexographic_lt(np.asarray(self.data), np.asarray(other.data)) + + @classmethod + def _shapely_to_coordinates(cls, shape): + raise NotImplementedError() + + @classmethod + def from_shapely(cls, shape): + """ + Build a spatialpandas geometry object from a shapely shape + + Args: + shape: A shapely shape + + Returns: + spatialpandas geometry object with type of the calling class + """ + shape_parts = cls._shapely_to_coordinates(shape) + return cls(shape_parts) + + @property + def numpy_dtype(self): + return np.dtype(self.data.type.to_pandas_dtype()) + + +class Geometry0(Geometry): + _nesting_levels = 0 + + @property + def numpy_dtype(self): + return np.dtype(self.data.type.to_pandas_dtype()) + + @property + def _values(self): + return np.asarray(self.data) + + @property + def _value_offsets(self): + return np.array([0, len(self.data)]) + + +class Geometry1(Geometry): + _nesting_levels = 1 + + @property + def numpy_dtype(self): + if isinstance(self.data, pa.NullArray): + return None + else: + return np.dtype(self.data.type.value_type.to_pandas_dtype()) + + @property + def _value_offsets(self): + buffers = self.data.buffers() + if len(buffers) <= 1: + # Treat NullArray as empty double array so numba can handle it + return np.array([0], dtype=np.uint32) + else: + offsets0 = np.asarray(buffers[1]).view(np.uint32) + + start = self.data.offset + stop = start + len(self.data) + 1 + return offsets0[start:stop] + + @property + def _values(self): + if isinstance(self.data, pa.NullArray): + # Treat NullArray as empty double array so numba can handle it + return np.array([], dtype=np.float64) + else: + return self.data.flatten().to_numpy() + + +class Geometry2(Geometry): + _nesting_levels = 2 + + @property + def numpy_dtype(self): + if isinstance(self.data, pa.NullArray): + return None + else: + return np.dtype(self.data.type.value_type.value_type.to_pandas_dtype()) + + @property + def _value_offsets(self): + buffers = self.data.buffers() + if len(buffers) <= 1: + # Treat NullArray as empty double array so numba can handle it + return np.array([0], dtype=np.uint32) + else: + offsets0 = np.asarray(buffers[1]).view(np.uint32) + offsets1 = np.asarray(buffers[3]).view(np.uint32) + + start = offsets0[self.data.offset] + stop = offsets0[self.data.offset + len(self.data)] + return offsets1[start:stop + 1] + + @property + def _values(self): + if isinstance(self.data, pa.NullArray): + # Treat NullArray as empty double array so numba can handle it + return np.array([], dtype=np.float64) + else: + return self.data.flatten().flatten().to_numpy() + + +class GeometryArray(ExtensionArray, _ArrowBufferMixin): + _can_hold_na = True + _element_type = Geometry + _nesting_levels = 1 + + # Import / export methods + @classmethod + def from_geopandas(cls, ga): + """ + Build a spatialpandas geometry array from a geopandas GeometryArray or + GeoSeries. + + Args: + ga: A geopandas GeometryArray or GeoSeries to import + + Returns: + spatialpandas geometry array with type of the calling class + """ + if cls is GeometryArray: + raise ValueError( + "from_geopandas must be called on a subclass of GeometryArray" + ) + return cls([cls._element_type._shapely_to_coordinates(shape) for shape in ga]) + + def to_geopandas(self): + """ + Convert a spatialpandas geometry array into a geopandas GeometryArray + + Returns: + geopandas GeometryArray + """ + from geopandas.array import from_shapely + return from_shapely([el.to_shapely() for el in self]) + + def __arrow_array__(self, type=None): + return self.data + + @classmethod + def __from_arrow__(cls, data): + return cls(data) + + # Constructor + def __init__(self, array, dtype=None, copy=None): + # Choose default dtype for empty arrays + try: + if len(array) == 0 and dtype is None: + dtype = 'float64' + except: + # len failed + pass + + if isinstance(dtype, GeometryDtype): + # Use arrow type as-is + arrow_dtype = dtype.arrow_dtype + elif dtype is not None and dtype != np.dtype('object'): + # Scalar element dtype + arrow_dtype = pa.from_numpy_dtype(dtype) + + # Wrap dtype with appropriate number of nesting levels + for i in range(self._nesting_levels): + arrow_dtype = pa.list_(arrow_dtype) + else: + # Let arrow infer type + arrow_dtype = None + + # Unwrap Geometry elements to numpy arrays + if is_array_like(array) or isinstance(array, list): + array = [_unwrap_geometry(el, self._element_type) for el in array] + self.data = pa.array(array, type=arrow_dtype) + elif isinstance(array, pa.Array): + self.data = array + elif isinstance(array, pa.ChunkedArray): + self.data = pa.concat_arrays(array) + else: + raise ValueError( + "Unsupported type passed for {}: {}".format( + self.__class__.__name__, type(array) + ) + ) + + self.offsets = np.array([0]) + + # Check that inferred type has the right number of nested levels + pyarrow_element_type = _validate_nested_arrow_type( + self._nesting_levels, self.data.type + ) + + self._pyarrow_element_type = pyarrow_element_type + self._numpy_element_type = pyarrow_element_type.to_pandas_dtype() + self._dtype = self._dtype_class(self._numpy_element_type) + + # Validate input data is compatible + offsets = self.buffer_offsets + + # Validate even number of inner elements per polygon + if any((offsets[-1] % 2) > 0): + raise ValueError(""" +Geometry objects are represented by interleaved x and y coordinates, so they must have +an even number of elements. Received specification with an odd number of elements.""") + + @property + def _dtype_class(self): + return GeometryDtype + + @property + def dtype(self): + return self._dtype + + @property + def nbytes(self): + size = 0 + for buf in self.data.buffers(): + if buf is not None: + size += buf.size + return size + + nbytes.__doc__ = ExtensionArray.nbytes.__doc__ + + def astype(self, dtype, copy=True): + if self.dtype == dtype: + return self.copy() if copy else self + + if dtype == np.dtype('object'): + return np.array(self, dtype='object') + + if isinstance(dtype, GeometryDtype): + dtype = dtype.arrow_dtype.to_pandas_dtype() + elif isinstance(dtype, pa.DataType): + dtype = dtype.to_pandas_dtype() + else: + dtype = np.dtype(dtype) + + return GeometryArray(np.asarray(self).astype(dtype), dtype=dtype) + + astype.__doc__ = ExtensionArray.astype.__doc__ + + def isna(self): + return extract_isnull_bytemap(self.data) + + isna.__doc__ = ExtensionArray.isna.__doc__ + + def copy(self): + return type(self)(self.data) + + copy.__doc__ = ExtensionArray.copy.__doc__ + + def __len__(self): + return len(self.data) + + def __getitem__(self, item): + err_msg = ("Only integers, slices and integer or boolean" + "arrays are valid indices.") + if isinstance(item, Integral): + item = int(item) + if item < -len(self) or item >= len(self): + raise IndexError("{item} is out of bounds".format(item=item)) + else: + # Convert negative item index + if item < 0: + item += len(self) + + value = self.data[item].as_py() + if value is not None: + return self._element_type(value) + else: + return None + elif isinstance(item, slice): + data = [] + selected_indices = np.arange(len(self))[item] + + for selected_index in selected_indices: + data.append(self[selected_index]) + + return self.__class__(data, dtype=self.dtype) + elif isinstance(item, Iterable): + item = np.asarray(item) + if item.dtype == 'bool': + data = [] + + for i, m in enumerate(item): + if m: + data.append(self[i]) + + return self.__class__(data, dtype=self.dtype) + + elif item.dtype.kind in ('i', 'u'): + return self.take(item, allow_fill=False) + else: + raise IndexError(err_msg) + else: + raise IndexError(err_msg) + + @property + def numpy_dtype(self): + return self._numpy_element_type().dtype + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=None): + if isinstance(scalars, GeometryArray): + return scalars + + return cls(scalars, dtype=dtype) + + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + + data = self.astype(object) + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + # fill value should always be translated from the scalar + # type for the array, to the physical storage type for + # the data, before passing to take. + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype.subtype) + + take.__doc__ = ExtensionArray.take.__doc__ + + def _values_for_factorize(self): + return np.array(self, dtype='object'), None + + @classmethod + def _from_factorized(cls, values, original): + return cls(values, dtype=original.dtype) + + def _values_for_argsort(self): + return np.array(list(self), dtype='object') + + @classmethod + def _concat_same_type(cls, to_concat): + return cls( + pa.concat_arrays( + [ea.data for ea in to_concat] + ), + dtype=to_concat[0].dtype + ) + + def fillna(self, value=None, method=None, limit=None): + from pandas.api.types import is_array_like + from pandas.util._validators import validate_fillna_kwargs + from pandas.core.missing import pad_1d, backfill_1d + + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + + if is_array_like(value): + if len(value) != len(self): + raise ValueError( + "Length of 'value' does not match. Got ({}) " + " expected {}".format(len(value), len(self)) + ) + value = value[mask] + + if mask.any(): + if method is not None: + func = pad_1d if method == "pad" else backfill_1d + new_values = func(self.astype(object), limit=limit, mask=mask) + new_values = self._from_sequence(new_values, self._dtype) + else: + # fill with value + new_values = np.asarray(self) + if isinstance(value, Geometry): + value = [value] + new_values[mask] = value + new_values = self.__class__(new_values) + else: + new_values = self.copy() + return new_values + + fillna.__doc__ = ExtensionArray.fillna.__doc__ + + # Base geometry methods + @property + def bounds(self): + return bounds_interleaved(self.flat_values) + + @property + def bounds_x(self): + return bounds_interleaved_1d(self.flat_values, 0) + + @property + def bounds_y(self): + return bounds_interleaved_1d(self.flat_values, 1) diff --git a/spatialpandas/geometry/line.py b/spatialpandas/geometry/line.py new file mode 100644 index 0000000..2d839d1 --- /dev/null +++ b/spatialpandas/geometry/line.py @@ -0,0 +1,122 @@ +from pandas.core.dtypes.dtypes import register_extension_dtype + +from spatialpandas.geometry._algorithms import compute_line_length, \ + geometry_map_nested1 +from spatialpandas.geometry.base import ( + GeometryArray, GeometryDtype, Geometry0 +) +import numpy as np +from dask.dataframe.extensions import make_array_nonempty + + +@register_extension_dtype +class LineDtype(GeometryDtype): + _geometry_name = 'line' + + @classmethod + def construct_array_type(cls, *args): + if len(args) > 0: + raise NotImplementedError("construct_array_type does not support arguments") + return LineArray + + +class Line(Geometry0): + @classmethod + def _shapely_to_coordinates(cls, shape): + import shapely.geometry as sg + if isinstance(shape, (sg.LineString, sg.LinearRing)): + # Single line + return np.asarray(shape.ctypes) + else: + raise ValueError(""" +Received invalid value of type {typ}. Must be an instance of LineString +or LinearRing""".format(typ=type(shape).__name__)) + + def to_shapely(self): + """ + Convert to shapely shape + + Returns: + shapely LineString shape + """ + import shapely.geometry as sg + line_coords = self.data.to_numpy() + return sg.LineString(line_coords.reshape(len(line_coords) // 2, 2)) + + @classmethod + def from_shapely(cls, shape): + """ + Build a spatialpandas Line object from a shapely shape + + Args: + shape: A shapely LineString or LinearRing shape + + Returns: + spatialpandas Line + """ + return super(Line, cls).from_shapely(shape) + + @property + def length(self): + return compute_line_length(self._values, self._value_offsets) + + @property + def area(self): + return 0.0 + + +class LineArray(GeometryArray): + _element_type = Line + _nesting_levels = 1 + + @property + def _dtype_class(self): + return LineDtype + + @classmethod + def from_geopandas(cls, ga): + """ + Build a spatialpandas LineArray from a geopandas GeometryArray or + GeoSeries. + + Args: + ga: A geopandas GeometryArray or GeoSeries of `LineString` or + `LinearRing`shapes. + + Returns: + LineArray + """ + return super(LineArray, cls).from_geopandas(ga) + + @property + def length(self): + result = np.full(len(self), np.nan, dtype=np.float64) + for c, result_offset in enumerate(self.offsets): + geometry_map_nested1( + compute_line_length, + result, + result_offset, + self.buffer_values, + self.buffer_offsets, + self.isna(), + ) + return result + + @property + def area(self): + return np.zeros(len(self), dtype=np.float64) + + +def _line_array_non_empty(dtype): + """ + Create an example length 2 array to register with Dask. + See https://docs.dask.org/en/latest/dataframe-extend.html#extension-arrays + """ + return LineArray([ + [1, 0, 1, 1], + [1, 2, 0, 0] + ], dtype=dtype) + + +if make_array_nonempty: + make_array_nonempty.register(LineDtype)(_line_array_non_empty) diff --git a/spatialpandas/geometry/multiline.py b/spatialpandas/geometry/multiline.py new file mode 100644 index 0000000..d445a3e --- /dev/null +++ b/spatialpandas/geometry/multiline.py @@ -0,0 +1,129 @@ +from pandas.core.dtypes.dtypes import register_extension_dtype + +from spatialpandas.geometry.base import ( + GeometryArray, GeometryDtype, Geometry1 +) +import numpy as np +from spatialpandas.geometry._algorithms import ( + compute_line_length, geometry_map_nested2 +) +from dask.dataframe.extensions import make_array_nonempty + + +@register_extension_dtype +class MultiLineDtype(GeometryDtype): + _geometry_name = 'multiline' + @classmethod + def construct_array_type(cls, *args): + if len(args) > 0: + raise NotImplementedError("construct_array_type does not support arguments") + return MultiLineArray + + +class MultiLine(Geometry1): + @classmethod + def _shapely_to_coordinates(cls, shape): + import shapely.geometry as sg + if isinstance(shape, sg.MultiLineString): + shape = list(shape) + line_parts = [] + for line in shape: + line_parts.append(np.asarray(line.ctypes)) + return line_parts + elif isinstance(shape, (sg.LineString, sg.LinearRing)): + return [np.asarray(shape.ctypes)] + else: + raise ValueError(""" +Received invalid value of type {typ}. Must be an instance of MultiLineString +""".format(typ=type(shape).__name__)) + + def to_shapely(self): + """ + Convert to shapely shape + + Returns: + shapely MultiLineString shape + """ + import shapely.geometry as sg + line_arrays = [line_coords.reshape(len(line_coords) // 2, 2) + for line_coords in np.asarray(self.data)] + lines = [sg.LineString(line_array) for line_array in line_arrays] + return sg.MultiLineString(lines=lines) + + @classmethod + def from_shapely(cls, shape): + """ + Build a spatialpandas MultiLine object from a shapely shape + + Args: + shape: A shapely MultiLineString, LineString, or LinearRing shape + + Returns: + spatialpandas MultiLine + """ + return super(MultiLine, cls).from_shapely(shape) + + @property + def length(self): + return compute_line_length(self._values, self._value_offsets) + + @property + def area(self): + return 0.0 + + +class MultiLineArray(GeometryArray): + _element_type = MultiLine + _nesting_levels = 2 + + @property + def _dtype_class(self): + return MultiLineDtype + + @classmethod + def from_geopandas(cls, ga): + """ + Build a spatialpandas MultiLineArray from a geopandas GeometryArray or + GeoSeries. + + Args: + ga: A geopandas GeometryArray or GeoSeries of MultiLineString, + LineString, or LinearRing shapes. + + Returns: + MultiLineArray + """ + return super(MultiLineArray, cls).from_geopandas(ga) + + @property + def length(self): + result = np.full(len(self), np.nan, dtype=np.float64) + for c, result_offset in enumerate(self.offsets): + geometry_map_nested2( + compute_line_length, + result, + result_offset, + self.buffer_values, + self.buffer_offsets, + self.isna(), + ) + return result + + @property + def area(self): + return np.zeros(len(self), dtype=np.float64) + + +def _multi_line_array_non_empty(dtype): + """ + Create an example length 2 array to register with Dask. + See https://docs.dask.org/en/latest/dataframe-extend.html#extension-arrays + """ + return MultiLineArray([ + [[1, 0, 1, 1], [1, 2, 0, 0]], + [[3, 3, 4, 4]] + ], dtype=dtype) + + +if make_array_nonempty: + make_array_nonempty.register(MultiLineDtype)(_multi_line_array_non_empty) diff --git a/spatialpandas/geometry/multipoint.py b/spatialpandas/geometry/multipoint.py new file mode 100644 index 0000000..a330dd4 --- /dev/null +++ b/spatialpandas/geometry/multipoint.py @@ -0,0 +1,110 @@ +import numpy as np +from pandas.core.dtypes.dtypes import register_extension_dtype + +from spatialpandas.geometry.base import ( + GeometryArray, GeometryDtype, Geometry0 +) +from dask.dataframe.extensions import make_array_nonempty + + +@register_extension_dtype +class MultiPointDtype(GeometryDtype): + _geometry_name = 'multipoint' + @classmethod + def construct_array_type(cls, *args): + if len(args) > 0: + raise NotImplementedError("construct_array_type does not support arguments") + return MultiPointArray + + +class MultiPoint(Geometry0): + + @classmethod + def _shapely_to_coordinates(cls, shape): + import shapely.geometry as sg + if isinstance(shape, (sg.Point, sg.MultiPoint)): + # Single line + return np.asarray(shape.ctypes) + else: + raise ValueError(""" +Received invalid value of type {typ}. Must be an instance of Point, +or MultiPoint""".format(typ=type(shape).__name__)) + + def to_shapely(self): + """ + Convert to shapely shape + + Returns: + shapely MultiPoint shape + """ + import shapely.geometry as sg + line_coords = self.data.to_numpy() + return sg.MultiPoint(line_coords.reshape(len(line_coords) // 2, 2)) + + @classmethod + def from_shapely(cls, shape): + """ + Build a spatialpandas MultiPoint object from a shapely shape + + Args: + shape: A shapely MultiPoint or Point shape + + Returns: + spatialpandas MultiPoint + """ + return super(MultiPoint, cls).from_shapely(shape) + + @property + def length(self): + return 0.0 + + @property + def area(self): + return 0.0 + + +class MultiPointArray(GeometryArray): + _element_type = MultiPoint + _nesting_levels = 1 + + @property + def _dtype_class(self): + return MultiPointDtype + + @classmethod + def from_geopandas(cls, ga): + """ + Build a spatialpandas MultiPointArray from a geopandas GeometryArray or + GeoSeries. + + Args: + ga: A geopandas GeometryArray or GeoSeries of MultiPoint or + Point shapes. + + Returns: + MultiPointArray + """ + return super(MultiPointArray, cls).from_geopandas(ga) + + @property + def length(self): + return np.zeros(len(self), dtype=np.float64) + + @property + def area(self): + return np.zeros(len(self), dtype=np.float64) + + +def _multi_points_array_non_empty(dtype): + """ + Create an example length 2 array to register with Dask. + See https://docs.dask.org/en/latest/dataframe-extend.html#extension-arrays + """ + return MultiPointArray([ + [1, 0, 1, 1], + [1, 2, 0, 0] + ], dtype=dtype) + + +if make_array_nonempty: + make_array_nonempty.register(MultiPointDtype)(_multi_points_array_non_empty) diff --git a/spatialpandas/geometry/multipolygon.py b/spatialpandas/geometry/multipolygon.py new file mode 100644 index 0000000..9a09e62 --- /dev/null +++ b/spatialpandas/geometry/multipolygon.py @@ -0,0 +1,188 @@ +from __future__ import absolute_import +from pandas.core.dtypes.dtypes import register_extension_dtype + +from spatialpandas.geometry import Polygon +from spatialpandas.geometry.base import ( + GeometryArray, GeometryDtype, Geometry2 +) +from spatialpandas.geometry.multiline import MultiLineArray, MultiLine +import numpy as np +from spatialpandas.geometry._algorithms import ( + compute_line_length, compute_area, geometry_map_nested3 +) +from dask.dataframe.extensions import make_array_nonempty +import pyarrow as pa + + +@register_extension_dtype +class MultiPolygonDtype(GeometryDtype): + _geometry_name = 'multipolygon' + + @classmethod + def construct_array_type(cls, *args): + if len(args) > 0: + raise NotImplementedError("construct_array_type does not support arguments") + return MultiPolygonArray + + +class MultiPolygon(Geometry2): + + @classmethod + def _shapely_to_coordinates(cls, shape, orient=True): + import shapely.geometry as sg + if isinstance(shape, sg.MultiPolygon): + multipolygon = [] + for polygon in shape: + polygon_coords = Polygon._shapely_to_coordinates(polygon, orient) + multipolygon.append(polygon_coords) + + return multipolygon + elif isinstance(shape, sg.Polygon): + return [Polygon._shapely_to_coordinates(shape, orient)] + else: + raise ValueError(""" +Received invalid value of type {typ}. Must be an instance of Polygon or MultiPolygon +""".format(typ=type(shape).__name__)) + + def to_shapely(self): + """ + Convert to shapely shape + + Returns: + shapely MultiPolygon shape + """ + import shapely.geometry as sg + polygon_arrays = np.asarray(self.data) + + polygons = [] + for polygon_array in polygon_arrays: + ring_arrays = [line_coords.reshape(len(line_coords) // 2, 2) + for line_coords in polygon_array] + + rings = [sg.LinearRing(ring_array) for ring_array in ring_arrays] + polygons.append(sg.Polygon(shell=rings[0], holes=rings[1:])) + + return sg.MultiPolygon(polygons=polygons) + + @classmethod + def from_shapely(cls, shape, orient=True): + """ + Build a spatialpandas MultiPolygon object from a shapely shape + + Args: + shape: A shapely Polygon or MultiPolygon shape + orient: If True (default), reorder polygon vertices so that outer shells + are stored in counter clockwise order and holes are stored in + clockwise order. If False, accept vertices as given. Note that + while there is a performance cost associated with this operation + some algorithms will not behave properly if the above ordering + convention is not followed, so only set orient=False if it is + known that this convention is followed in the input data. + Returns: + spatialpandas MultiPolygon + """ + shape_parts = cls._shapely_to_coordinates(shape, orient) + return cls(shape_parts) + + + @property + def boundary(self): + new_offsets = self.buffer_offsets[1] + new_data = pa.ListArray.from_arrays(new_offsets, self.buffer_values) + return MultiLine(new_data) + + @property + def length(self): + return compute_line_length(self._values, self._value_offsets) + + @property + def area(self): + return compute_area(self._values, self._value_offsets) + + +class MultiPolygonArray(GeometryArray): + _element_type = MultiPolygon + _nesting_levels = 3 + + @property + def _dtype_class(self): + return MultiPolygonDtype + + @classmethod + def from_geopandas(cls, ga, orient=True): + """ + Build a spatialpandas MultiPolygonArray from a geopandas GeometryArray or + GeoSeries. + + Args: + ga: A geopandas GeometryArray or GeoSeries of MultiPolygon or + Polygon shapes. + orient: If True (default), reorder polygon vertices so that outer shells + are stored in counter clockwise order and holes are stored in + clockwise order. If False, accept vertices as given. Note that + while there is a performance cost associated with this operation + some algorithms will not behave properly if the above ordering + convention is not followed, so only set orient=False if it is + known that this convention is followed in the input data. + + Returns: + MultiPolygonArray + """ + return cls([MultiPolygon._shapely_to_coordinates(shape, orient) for shape in ga]) + + @property + def boundary(self): + offsets = self.buffer_offsets + inner_data = pa.ListArray.from_arrays(offsets[2], self.buffer_values) + new_data = pa.ListArray.from_arrays(offsets[1][offsets[0]], inner_data) + return MultiLineArray(new_data) + + @property + def length(self): + result = np.full(len(self), np.nan, dtype=np.float64) + for c, result_offset in enumerate(self.offsets): + geometry_map_nested3( + compute_line_length, + result, + result_offset, + self.buffer_values, + self.buffer_offsets, + self.isna(), + ) + return result + + @property + def area(self): + result = np.full(len(self), np.nan, dtype=np.float64) + for c, result_offset in enumerate(self.offsets): + geometry_map_nested3( + compute_area, + result, + result_offset, + self.buffer_values, + self.buffer_offsets, + self.isna(), + ) + return result + + +def _multi_polygon_array_non_empty(dtype): + """ + Create an example length 2 array to register with Dask. + See https://docs.dask.org/en/latest/dataframe-extend.html#extension-arrays + """ + return MultiPolygonArray([ + [ + [[1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0], + [1.1, 1.1, 1.5, 1.9, 1.9, 1.1, 1.1, 1.1]] + ], + [ + [[0.0, 0.0, 1.0, 0.0, 2.0, 1.0, 0.5, 3.0, -1.0, 1.0, 0.0, 0.0], + [0.2, 0.2, 0.5, 1.0, 0.8, 0.2, 0.2, 0.2], + [0.5, 1.25, 0.3, 2.0, 0.8, 2.0, 0.5, 1.25]] + ] + ], dtype=dtype) + + +if make_array_nonempty: + make_array_nonempty.register(MultiPolygonDtype)(_multi_polygon_array_non_empty) diff --git a/spatialpandas/geometry/polygon.py b/spatialpandas/geometry/polygon.py new file mode 100644 index 0000000..8c198e8 --- /dev/null +++ b/spatialpandas/geometry/polygon.py @@ -0,0 +1,169 @@ +from __future__ import absolute_import +from pandas.core.dtypes.dtypes import register_extension_dtype + +from spatialpandas.geometry.base import ( + GeometryArray, GeometryDtype, Geometry1 +) +from spatialpandas.geometry.multiline import MultiLineArray, MultiLine +import numpy as np +from spatialpandas.geometry._algorithms import ( + compute_line_length, compute_area, geometry_map_nested2 +) +from dask.dataframe.extensions import make_array_nonempty + + +@register_extension_dtype +class PolygonDtype(GeometryDtype): + _geometry_name = 'polygon' + + @classmethod + def construct_array_type(cls, *args): + if len(args) > 0: + raise NotImplementedError("construct_array_type does not support arguments") + return PolygonArray + + +class Polygon(Geometry1): + @classmethod + def _shapely_to_coordinates(cls, shape, orient=True): + import shapely.geometry as sg + if isinstance(shape, sg.Polygon): + if orient: + shape = sg.polygon.orient(shape) + exterior = np.asarray(shape.exterior.ctypes) + polygon_coords = [exterior] + for ring in shape.interiors: + interior = np.asarray(ring.ctypes) + polygon_coords.append(interior) + + return polygon_coords + else: + raise ValueError(""" +Received invalid value of type {typ}. Must be an instance of Polygon +""".format(typ=type(shape).__name__)) + + def to_shapely(self): + """ + Convert to shapely shape + + Returns: + shapely Polygon shape + """ + import shapely.geometry as sg + ring_arrays = [line_coords.reshape(len(line_coords) // 2, 2) + for line_coords in np.asarray(self.data)] + rings = [sg.LinearRing(ring_array) for ring_array in ring_arrays] + return sg.Polygon(shell=rings[0], holes=rings[1:]) + + @classmethod + def from_shapely(cls, shape, orient=True): + """ + Build a spatialpandas Polygon object from a shapely shape + + Args: + shape: A shapely Polygon shape + orient: If True (default), reorder polygon vertices so that outer shells + are stored in counter clockwise order and holes are stored in + clockwise order. If False, accept vertices as given. Note that + while there is a performance cost associated with this operation + some algorithms will not behave properly if the above ordering + convention is not followed, so only set orient=False if it is + known that this convention is followed in the input data. + Returns: + spatialpandas Polygon + """ + shape_parts = cls._shapely_to_coordinates(shape, orient) + return cls(shape_parts) + + @property + def boundary(self): + # The representation of PolygonArray and MultiLineArray is identical + return MultiLine(self.data) + + @property + def length(self): + return compute_line_length(self._values, self._value_offsets) + + @property + def area(self): + return compute_area(self._values, self._value_offsets) + + +class PolygonArray(GeometryArray): + _element_type = Polygon + _nesting_levels = 2 + + @property + def _dtype_class(self): + return PolygonDtype + + @classmethod + def from_geopandas(cls, ga, orient=True): + """ + Build a spatialpandas PolygonArray from a geopandas GeometryArray or + GeoSeries. + + Args: + ga: A geopandas GeometryArray or GeoSeries of Polygon shapes. + orient: If True (default), reorder polygon vertices so that outer shells + are stored in counter clockwise order and holes are stored in + clockwise order. If False, accept vertices as given. Note that + while there is a performance cost associated with this operation + some algorithms will not behave properly if the above ordering + convention is not followed, so only set orient=False if it is + known that this convention is followed in the input data. + Returns: + PolygonArray + """ + return cls([Polygon._shapely_to_coordinates(shape, orient) for shape in ga]) + + @property + def boundary(self): + # The representation of PolygonArray and MultiLineArray is identical + return MultiLineArray(self.data) + + @property + def length(self): + result = np.full(len(self), np.nan, dtype=np.float64) + for c, result_offset in enumerate(self.offsets): + geometry_map_nested2( + compute_line_length, + result, + result_offset, + self.buffer_values, + self.buffer_offsets, + self.isna(), + ) + return result + + @property + def area(self): + result = np.full(len(self), np.nan, dtype=np.float64) + for c, result_offset in enumerate(self.offsets): + geometry_map_nested2( + compute_area, + result, + result_offset, + self.buffer_values, + self.buffer_offsets, + self.isna(), + ) + return result + + +def _polygon_array_non_empty(dtype): + """ + Create an example length 2 array to register with Dask. + See https://docs.dask.org/en/latest/dataframe-extend.html#extension-arrays + """ + return PolygonArray( + [ + [[1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0], + [1.1, 1.1, 1.5, 1.9, 1.9, 1.1, 1.1, 1.1]], + [[1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 2.0, 1.0, 1.0]] + ], dtype=dtype + ) + + +if make_array_nonempty: + make_array_nonempty.register(PolygonDtype)(_polygon_array_non_empty) diff --git a/spatialpandas/geometry/ring.py b/spatialpandas/geometry/ring.py new file mode 100644 index 0000000..f36955d --- /dev/null +++ b/spatialpandas/geometry/ring.py @@ -0,0 +1,78 @@ +from pandas.core.dtypes.dtypes import register_extension_dtype + +from spatialpandas.geometry.line import ( + LineDtype, Line, LineArray +) +from dask.dataframe.extensions import make_array_nonempty + + +@register_extension_dtype +class RingDtype(LineDtype): + _geometry_name = 'ring' + + @classmethod + def construct_array_type(cls, *args): + return RingArray + + +class Ring(Line): + def to_shapely(self): + """ + Convert to shapely shape + + Returns: + shapely LinearRing shape + """ + import shapely.geometry as sg + line_coords = self.data.to_numpy() + return sg.LinearRing(line_coords.reshape(len(line_coords) // 2, 2)) + + @classmethod + def from_shapely(cls, shape): + """ + Build a spatialpandas Ring object from a shapely shape + + Args: + shape: A shapely LinearRing shape + + Returns: + spatialpandas Ring + """ + return super(Ring, cls).from_shapely(shape) + + +class RingArray(LineArray): + _element_type = Line + + @property + def _dtype_class(self): + return RingDtype + + @classmethod + def from_geopandas(cls, ga): + """ + Build a spatialpandas RingArray from a geopandas GeometryArray or + GeoSeries. + + Args: + ga: A geopandas GeometryArray or GeoSeries of LinearRing shapes. + + Returns: + RingArray + """ + return super(RingArray, cls).from_geopandas(ga) + + +def _ring_array_non_empty(dtype): + """ + Create an example length 2 array to register with Dask. + See https://docs.dask.org/en/latest/dataframe-extend.html#extension-arrays + """ + return RingArray([ + [0, 0, 1, 0, 1, 1, 0, 0], + [2, 2, 2, 3, 3, 3, 2, 2] + ], dtype=dtype) + + +if make_array_nonempty: + make_array_nonempty.register(RingDtype)(_ring_array_non_empty) diff --git a/spatialpandas/spatialindex/hilbert_curve.py b/spatialpandas/spatialindex/hilbert_curve.py index eb9982e..c82ffc7 100644 --- a/spatialpandas/spatialindex/hilbert_curve.py +++ b/spatialpandas/spatialindex/hilbert_curve.py @@ -3,7 +3,7 @@ import numpy as np import os -from spatialpandas.geometry.base import ngjit +from spatialpandas.utils import ngjit """ Initially based on https://github.com/galtay/hilbert_curve, but specialized diff --git a/spatialpandas/spatialindex/rtree.py b/spatialpandas/spatialindex/rtree.py index 7a62472..b686b51 100644 --- a/spatialpandas/spatialindex/rtree.py +++ b/spatialpandas/spatialindex/rtree.py @@ -2,9 +2,10 @@ from numba import jitclass from numba import int64, float64 -from spatialpandas.geometry.base import ngjit -from spatialpandas.spatialindex.hilbert_curve import _data2coord, \ - distance_from_coordinates +from spatialpandas.spatialindex.hilbert_curve import ( + _data2coord, distance_from_coordinates +) +from spatialpandas.utils import ngjit @ngjit diff --git a/spatialpandas/utils.py b/spatialpandas/utils.py new file mode 100644 index 0000000..a73d949 --- /dev/null +++ b/spatialpandas/utils.py @@ -0,0 +1,3 @@ +from numba import jit + +ngjit = jit(nopython=True, nogil=True) diff --git a/tests/geometry/__init__.py b/tests/geometry/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/geometry/test_geometry.py b/tests/geometry/test_geometry.py new file mode 100644 index 0000000..4edb5c3 --- /dev/null +++ b/tests/geometry/test_geometry.py @@ -0,0 +1,85 @@ +import numpy as np +from spatialpandas.geometry import ( + MultiPoint, MultiPointArray, Line, LineArray, + MultiLine, MultiLineArray, Polygon, PolygonArray, + MultiPolygon, MultiPolygonArray +) + +unit_square_cw = np.array([1, 1, 1, 2, 2, 2, 2, 1, 1, 1], dtype='float64') +large_square_ccw = np.array([0, 0, 3, 0, 3, 3, 0, 3, 0, 0], dtype='float64') + + +def test_points(): + points = MultiPoint(unit_square_cw) + assert points.length == 0.0 + assert points.area == 0.0 + + +def test_points_array(): + points = MultiPointArray([ + unit_square_cw, + large_square_ccw, + np.concatenate([large_square_ccw, unit_square_cw]) + ]) + + np.testing.assert_equal(points.length, [0.0, 0.0, 0.0]) + np.testing.assert_equal(points.area, [0.0, 0.0, 0.0]) + assert points.bounds == (0.0, 0.0, 3.0, 3.0) + + +def test_lines(): + lines = Line(unit_square_cw) + assert lines.length == 4.0 + assert lines.area == 0.0 + + +def test_lines_array(): + lines = LineArray([ + unit_square_cw, + large_square_ccw, + np.concatenate([large_square_ccw, [np.nan, np.nan], unit_square_cw]) + ]) + + np.testing.assert_equal(lines.length, [4.0, 12.0, 16.0]) + np.testing.assert_equal(lines.area, [0.0, 0.0, 0.0]) + assert lines.bounds == (0.0, 0.0, 3.0, 3.0) + + +def test_polygon(): + polygon = Polygon([large_square_ccw, unit_square_cw]) + assert polygon.length == 16.0 + assert polygon.area == 8.0 + + +def test_polygon_array(): + polygons = PolygonArray([ + [large_square_ccw], + [large_square_ccw, unit_square_cw], + [unit_square_cw] + ]) + np.testing.assert_equal(polygons.length, [12.0, 16.0, 4.0]) + np.testing.assert_equal(polygons.area, [9.0, 8.0, -1.0]) + assert polygons.bounds == (0.0, 0.0, 3.0, 3.0) + + +def test_multipolygon(): + multipolygon = MultiPolygon([ + [large_square_ccw, unit_square_cw], + [large_square_ccw + 4.0] + ]) + assert multipolygon.length == 28.0 + assert multipolygon.area == 17.0 + + +def test_multipolygon_array(): + multipolygon = MultiPolygonArray([ + [ + [large_square_ccw, unit_square_cw], + [large_square_ccw + 4.0] + ], [ + [large_square_ccw + 8.0] + ] + ]) + np.testing.assert_equal(multipolygon.length, [28.0, 12.0]) + np.testing.assert_equal(multipolygon.area, [17.0, 9.0]) + assert multipolygon.bounds == (0.0, 0.0, 11.0, 11.0) diff --git a/tests/test_extensionarray.py b/tests/test_extensionarray.py new file mode 100644 index 0000000..56afb2c --- /dev/null +++ b/tests/test_extensionarray.py @@ -0,0 +1,180 @@ +import pandas.tests.extension.base as eb +import pytest +from spatialpandas.geometry import LineArray, LineDtype + + +# Pandas-provided extension array tests +# ------------------------------------- +# See http://pandas-docs.github.io/pandas-docs-travis/extending.html +# +# Fixtures +@pytest.fixture +def dtype(): + """A fixture providing the ExtensionDtype to validate.""" + return LineDtype(subtype='float64') + + +@pytest.fixture +def data(): + """Length-100 array for this type. + * data[0] and data[1] should both be non missing + * data[0] and data[1] should not gbe equal + """ + return LineArray( + [[0, 1], [1, 2, 3, 4], None, [-1, -2], []]*20, dtype='float64') + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + Parameters + ---------- + data : fixture implementing `data` + Returns + ------- + Callable[[int], Generator]: + A callable that takes a `count` argument and + returns a generator yielding `count` datasets. + """ + def gen(count): + for _ in range(count): + yield data + return gen + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid]""" + return LineArray([None, [-1, 0, 1, 2]], dtype='int64') + + +@pytest.fixture(params=['data', 'data_missing']) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == 'data': + return data + elif request.param == 'data_missing': + return data_missing + + +@pytest.fixture +def data_for_sorting(): + """Length-3 array with a known sort order. + This should be three items [B, C, A] with + A < B < C + """ + return LineArray([[1, 0], [2, 0], [0, 0]]) + + +@pytest.fixture +def data_missing_for_sorting(): + """Length-3 array with a known sort order. + This should be three items [B, NA, A] with + A < B and NA missing. + """ + return LineArray([[1, 0], None, [0, 0]]) + + +@pytest.fixture +def data_for_grouping(): + """Data for factorization, grouping, and unique tests. + Expected to be like [B, B, NA, NA, A, A, B, C] + Where A < B < C and NA is missing + """ + return LineArray( + [[1, 0], [1, 0], None, None, [0, 0], [0, 0], [1, 0], [2, 0]]) + + +@pytest.fixture +def na_cmp(): + return lambda x, y: x is None and y is None + + +@pytest.fixture +def na_value(): + return None + + +@pytest.fixture +def groupby_apply_op(): + return lambda x: [1] * len(x) + + +@pytest.fixture +def fillna_method(): + return 'ffill' + + +@pytest.fixture(params=[True, False]) +def as_frame(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + return request.param + + +# Subclass BaseDtypeTests to run pandas-provided extension array test suite +class TestGeometryConstructors(eb.BaseConstructorsTests): + pass + + +class TestGeometryDtype(eb.BaseDtypeTests): + pass + + +class TestGeometryGetitem(eb.BaseGetitemTests): + pass + + +class TestGeometryGroupby(eb.BaseGroupbyTests): + pass + + +class TestGeometryInterface(eb.BaseInterfaceTests): + # # NotImplementedError: 'Geometry' does not support __setitem__ + @pytest.mark.skip(reason="__setitem__ not supported") + def test_copy(self): + pass + + +class TestGeometryMethods(eb.BaseMethodsTests): + # # AttributeError: 'RaggedArray' object has no attribute 'value_counts' + @pytest.mark.skip(reason="value_counts not supported") + def test_value_counts(self): + pass + + # Ragged array elements don't support binary operators + @pytest.mark.skip(reason="ragged does not support <= on elements") + def test_combine_le(self): + pass + + @pytest.mark.skip(reason="ragged does not support + on elements") + def test_combine_add(self): + pass + + @pytest.mark.skip(reason="combine_first not supported") + def test_combine_first(self): + pass + + +class TestGeometryPrinting(eb.BasePrintingTests): + pass + + +class TestGeometryMissing(eb.BaseMissingTests): + pass + + +class TestGeometryReshaping(eb.BaseReshapingTests): + @pytest.mark.skip(reason="__setitem__ not supported") + def test_ravel(self): + pass +