Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function for rolling windows on irregular data #236

Merged
merged 10 commits into from
Mar 13, 2020
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Coordinate Manipulation
project_region
inside
block_split
rolling_window

Utilities
---------
Expand Down
1 change: 1 addition & 0 deletions verde/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
grid_coordinates,
inside,
block_split,
rolling_window,
profile_coordinates,
get_region,
pad_region,
Expand Down
236 changes: 230 additions & 6 deletions verde/coordinates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Functions for generating and manipulating coordinates.
"""
import warnings

import numpy as np
from sklearn.utils import check_random_state

Expand Down Expand Up @@ -735,6 +737,7 @@ def block_split(coordinates, spacing=None, adjust="spacing", region=None, shape=
See also
--------
BlockReduce : Apply a reduction operation to the data in blocks (windows).
rolling_window : Select points on a rolling (moving) window.

Examples
--------
Expand Down Expand Up @@ -770,15 +773,236 @@ def block_split(coordinates, spacing=None, adjust="spacing", region=None, shape=
"""
if region is None:
region = get_region(coordinates)
block_coords = tuple(
i.ravel()
for i in grid_coordinates(
region, spacing=spacing, shape=shape, adjust=adjust, pixel_register=True
)
block_coords = grid_coordinates(
region, spacing=spacing, shape=shape, adjust=adjust, pixel_register=True
)
tree = kdtree(block_coords)
labels = tree.query(np.transpose(n_1d_arrays(coordinates, 2)))[1]
return block_coords, labels
return n_1d_arrays(block_coords, len(block_coords)), labels


def rolling_window(
coordinates, size, spacing=None, shape=None, region=None, adjust="spacing"
):
"""
Select points on a rolling (moving) window.

A window of the given size is moved across the region at a given step
(specified by *spacing* or *shape*). Returns the indices of points falling
inside each window step. You can use the indices to select points falling
inside a given window.

The size of the step when moving the windows can be specified by the
*spacing* parameter. Alternatively, the number of windows in the
South-North and West-East directions can be specified using the *shape*
parameter. **One of the two must be given.**

Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...).
size : float
The size of the windows. Units should match the units of *coordinates*.
spacing : float, tuple = (s_north, s_east), or None
The window size in the South-North and West-East directions,
respectively. A single value means that the size is equal in both
directions.
shape : tuple = (n_north, n_east) or None
The number of blocks in the South-North and West-East directions,
respectively.
region : list = [W, E, S, N]
The boundaries of a given region in Cartesian or geographic
coordinates. If not region is given, will use the bounding region of
the given points.
adjust : {'spacing', 'region'}
Whether to adjust the spacing or the region if required. Ignored if
*shape* is given instead of *spacing*. Defaults to adjusting the
spacing.

Returns
-------
window_coordinates : tuple of arrays
Coordinate arrays for the center of each window.
indices : array
Each element of the array corresponds the indices of points falling
inside a window. The array will have the same shape as the
*window_coordinates*. Use the array elements to index the coordinates
for each window. The indices will depend on the number of dimensions in
the input coordinates. For example, if the coordinates are 2D arrays,
each window will contain indices for 2 dimensions (row, column).

See also
--------
block_split : Split a region into blocks and label points accordingly.

Examples
--------

Generate a set of sample coordinates on a grid and determine the indices
of points for each rolling window:

>>> from verde import grid_coordinates
>>> coords = grid_coordinates((-5, -1, 6, 10), spacing=1)
>>> print(coords[0])
[[-5. -4. -3. -2. -1.]
[-5. -4. -3. -2. -1.]
[-5. -4. -3. -2. -1.]
[-5. -4. -3. -2. -1.]
[-5. -4. -3. -2. -1.]]
>>> print(coords[1])
[[ 6. 6. 6. 6. 6.]
[ 7. 7. 7. 7. 7.]
[ 8. 8. 8. 8. 8.]
[ 9. 9. 9. 9. 9.]
[10. 10. 10. 10. 10.]]
>>> # Get the rolling window indices
>>> window_coords, indices = rolling_window(coords, size=2, spacing=2)
>>> # Window coordinates will be 2D arrays. Their shape is the number of
>>> # windows in each dimension
>>> print(window_coords[0].shape, window_coords[1].shape)
(2, 2) (2, 2)
>>> # The there are the easting and northing coordinates for the center of
>>> # each rolling window
>>> for coord in window_coords:
... print(coord)
[[-4. -2.]
[-4. -2.]]
[[7. 7.]
[9. 9.]]
>>> # The indices of points falling on each window will have the same shape
>>> # as the window center coordinates
>>> print(indices.shape)
(2, 2)
>>> # The points in the first window. Indices are 2D positions because the
>>> # coordinate arrays are 2D.
>>> print(len(indices[0, 0]))
2
>>> for dimension in indices[0, 0]:
... print(dimension)
[0 0 0 1 1 1 2 2 2]
[0 1 2 0 1 2 0 1 2]
>>> for dimension in indices[0, 1]:
... print(dimension)
[0 0 0 1 1 1 2 2 2]
[2 3 4 2 3 4 2 3 4]
>>> for dimension in indices[1, 0]:
... print(dimension)
[2 2 2 3 3 3 4 4 4]
[0 1 2 0 1 2 0 1 2]
>>> for dimension in indices[1, 1]:
... print(dimension)
[2 2 2 3 3 3 4 4 4]
[2 3 4 2 3 4 2 3 4]
>>> # To get the coordinates for each window, use indexing
>>> print(coords[0][indices[0, 0]])
[-5. -4. -3. -5. -4. -3. -5. -4. -3.]
>>> print(coords[1][indices[0, 0]])
[6. 6. 6. 7. 7. 7. 8. 8. 8.]

If the coordinates are 1D, the indices will also be 1D:

>>> coords1d = [coord.ravel() for coord in coords]
>>> window_coords, indices = rolling_window(coords1d, size=2, spacing=2)
>>> print(len(indices[0, 0]))
1
>>> print(indices[0, 0][0])
[ 0 1 2 5 6 7 10 11 12]
>>> print(indices[0, 1][0])
[ 2 3 4 7 8 9 12 13 14]
>>> print(indices[1, 0][0])
[10 11 12 15 16 17 20 21 22]
>>> print(indices[1, 1][0])
[12 13 14 17 18 19 22 23 24]
>>> # The returned indices can be used in the same way as before
>>> print(coords1d[0][indices[0, 0]])
[-5. -4. -3. -5. -4. -3. -5. -4. -3.]
>>> print(coords1d[1][indices[0, 0]])
[6. 6. 6. 7. 7. 7. 8. 8. 8.]

By default, the windows will span the entire data region. You can also
control the specific region you'd like the windows to cover:

>>> # Coordinates on a larger region but with the same spacing as before
>>> coords = grid_coordinates((-10, 5, 0, 20), spacing=1)
>>> # Get the rolling window indices but limited to the region from before
>>> window_coords, indices = rolling_window(
... coords, size=2, spacing=2, region=(-5, -1, 6, 10),
... )
>>> # The windows should still be in the same place as before
>>> for coord in window_coords:
... print(coord)
[[-4. -2.]
[-4. -2.]]
[[7. 7.]
[9. 9.]]
>>> # And indexing the coordinates should also provide the same result
>>> print(coords[0][indices[0, 0]])
[-5. -4. -3. -5. -4. -3. -5. -4. -3.]
>>> print(coords[1][indices[0, 0]])
[6. 6. 6. 7. 7. 7. 8. 8. 8.]

"""
shapes = [coord.shape for coord in coordinates]
if not all(shape == shapes[0] for shape in shapes):
raise ValueError(
"Coordinate arrays must have the same shape. Given shapes: {}".format(
shapes
)
)
if region is None:
region = get_region(coordinates)
# Calculate the region spanning the centers of the rolling windows
window_region = [
dimension + (-1) ** (i % 2) * size / 2 for i, dimension in enumerate(region)
]
_check_rolling_window_overlap(window_region, size, shape, spacing)
centers = grid_coordinates(
window_region, spacing=spacing, shape=shape, adjust=adjust
)
# pykdtree doesn't support query_ball_point yet and we need that
tree = kdtree(coordinates, use_pykdtree=False)
# Coordinates must be transposed because the kd-tree wants them as columns
# of a matrix
# Use p=inf (infinity norm) to get square windows instead of circular ones
indices1d = tree.query_ball_point(
np.transpose(n_1d_arrays(centers, 2)), r=size / 2, p=np.inf
leouieda marked this conversation as resolved.
Show resolved Hide resolved
)
# Make the indices array the same shape as the center coordinates array.
# That preserves the information of the number of windows in each
# dimension. Need to first create an empty array of object type because
# otherwise numpy tries to use the index tuples as dimensions (even if
# given ndim=1 explicitly). Can't make it 1D and then reshape because the
# reshape is ignored for some reason. The workaround is to create the array
# with the correct shape and assign the values to a raveled view of the
# array.
indices = np.empty(centers[0].shape, dtype="object")
# Need to convert the indices to int arrays because unravel_index doesn't
# like empty lists but can handle empty integer arrays in case a window has
# no points inside it.
indices.ravel()[:] = [
np.unravel_index(np.array(i, dtype="int"), shape=shapes[0]) for i in indices1d
]
return centers, indices


def _check_rolling_window_overlap(region, size, shape, spacing):
"""
Warn the user if there is no overlap between neighboring windows.
"""
if shape is not None:
ndims = len(shape)
dimensions = [region[i * ndims + 1] - region[i * ndims] for i in range(ndims)]
# The - 1 is because we need to divide by the number of intervals, not
# the number of nodes.
spacing = tuple(dim / (n - 1) for dim, n in zip(dimensions, shape))
spacing = np.atleast_1d(spacing)
if np.any(spacing > size):
warnings.warn(
f"Rolling windows do not overlap (size '{size}' and spacing '{spacing}'). "
"Some data points may not be included in any window. "
"Increase size or decrease spacing to avoid this."
)


def longitude_continuity(coordinates, region):
Expand Down
44 changes: 44 additions & 0 deletions verde/tests/test_coordinates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Test the coordinate generation functions
"""
import warnings

import numpy as np
import numpy.testing as npt
import pytest
Expand All @@ -11,9 +13,51 @@
profile_coordinates,
grid_coordinates,
longitude_continuity,
rolling_window,
)


def test_rolling_window_invalid_coordinate_shapes():
"Shapes of input coordinates must all be the same"
coordinates = [np.arange(10), np.arange(10).reshape((5, 2))]
with pytest.raises(ValueError):
rolling_window(coordinates, size=2, spacing=1)


def test_rolling_window_empty():
"Make sure empty windows return an empty index"
coords = grid_coordinates((-5, -1, 6, 10), spacing=1)
# Use a larger region to make sure the first window is empty
# Doing this will raise a warning for non-overlapping windows. Capture it
# so it doesn't pollute the test log.
with warnings.catch_warnings(record=True):
windows = rolling_window(coords, size=0.001, spacing=1, region=(-7, 1, 4, 12))[
1
]
assert windows[0, 0][0].size == 0 and windows[0, 0][1].size == 0
# Make sure we can still index with an empty array
assert coords[0][windows[0, 0]].size == 0


def test_rolling_window_warnings():
"Should warn users if the windows don't overlap"
coords = grid_coordinates((-5, -1, 6, 10), spacing=1)
# For exact same size there will be 1 point overlapping so should not warn
with warnings.catch_warnings(record=True) as warn:
rolling_window(coords, size=2, spacing=2)
assert not any(issubclass(w.category, UserWarning) for w in warn)
args = [dict(spacing=3), dict(spacing=(4, 1)), dict(shape=(1, 2))]
for arg in args:
with warnings.catch_warnings(record=True) as warn:
rolling_window(coords, size=2, **arg)
# Filter out the user warnings from some deprecation warnings that
# might be thrown by other packages.
userwarnings = [w for w in warn if issubclass(w.category, UserWarning)]
assert len(userwarnings) == 1
assert issubclass(userwarnings[-1].category, UserWarning)
assert str(userwarnings[-1].message).split()[0] == "Rolling"


def test_spacing_to_shape():
"Check that correct spacing and region are returned"
region = (-10, 0, 0, 5)
Expand Down