Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter spatial #170

Merged
merged 33 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
0949214
spatial filter
masawdah Oct 2, 2023
64f09c2
spatial filter
masawdah Oct 2, 2023
7e99c39
improvement
masawdah Oct 3, 2023
c55a392
name dimesnions
masawdah Oct 3, 2023
36ced98
remove compute
masawdah Oct 3, 2023
39e38cc
handle CRS
masawdah Oct 4, 2023
e91b671
support feature collection
masawdah Oct 6, 2023
c819c4e
clean up
masawdah Oct 6, 2023
6ceacca
updated filter
masawdah Oct 9, 2023
a5e4c18
pytest filter_spatial
masawdah Oct 9, 2023
3dd0187
reformatted files
masawdah Oct 9, 2023
e3024a8
pre-check
masawdah Oct 9, 2023
3779303
pre-check
masawdah Oct 9, 2023
aa1da6c
remove parcels
masawdah Oct 10, 2023
423219e
Merge branch 'Open-EO:main' into filter_spatial
masawdah Oct 10, 2023
946b6ca
mask_polygon
masawdah Oct 12, 2023
a90411c
mask_polygon
masawdah Oct 13, 2023
6b7d4e3
pytest mask_polygon
masawdah Oct 13, 2023
fa426a3
add parameters to mask_polygon
masawdah Oct 13, 2023
3eb3e48
add parameters to mask_polygon
masawdah Oct 13, 2023
b4a38e7
update __init__ file
masawdah Oct 18, 2023
beaf891
mask parameter
masawdah Oct 19, 2023
a390f1f
automated expand mask dimension
masawdah Oct 19, 2023
204f720
update pytest mask_polygon
masawdah Oct 19, 2023
8500a5f
mask_polygon process
masawdah Oct 19, 2023
6bad2bf
mask_polygon
masawdah Oct 19, 2023
0f53f29
update mask_polygon
masawdah Oct 19, 2023
0b5aa1c
update mask_polygon
masawdah Oct 19, 2023
66d4dcb
final check mask_polygon
masawdah Oct 19, 2023
e851931
final check mask_polygon
masawdah Oct 19, 2023
4f013d4
clean up
masawdah Oct 30, 2023
fd23a61
clean up
masawdah Oct 30, 2023
e2f70ba
clean the load.py
masawdah Oct 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
from .general import *
from .indices import *
from .load import *
from .mask_polygon import *
from .merge import *
from .reduce import *
38 changes: 37 additions & 1 deletion openeo_processes_dask/process_implementations/cubes/_filter.py
GeraldIr marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import json
import logging
import warnings
from typing import Callable

import dask.array as da
import geopandas as gpd
import numpy as np
import pyproj
import rasterio
import rioxarray
import shapely
import xarray as xr
from openeo_pg_parser_networkx.pg_schema import BoundingBox, TemporalInterval

from openeo_processes_dask.process_implementations.cubes.mask_polygon import (
mask_polygon,
)
from openeo_processes_dask.process_implementations.data_model import RasterCube
from openeo_processes_dask.process_implementations.exceptions import (
BandFilterParameterMissing,
Expand All @@ -16,9 +24,18 @@
TooManyDimensions,
)

DEFAULT_CRS = "EPSG:4326"


logger = logging.getLogger(__name__)

__all__ = ["filter_labels", "filter_temporal", "filter_bands", "filter_bbox"]
__all__ = [
"filter_labels",
"filter_temporal",
"filter_bands",
"filter_bbox",
"filter_spatial",
]


def filter_temporal(
Expand Down Expand Up @@ -102,6 +119,25 @@
return data


def filter_spatial(data: RasterCube, geometries) -> RasterCube:
if "type" in geometries and geometries["type"] == "FeatureCollection":
gdf = gpd.GeoDataFrame.from_features(geometries, DEFAULT_CRS)
elif "type" in geometries and geometries["type"] in ["Polygon"]:
polygon = shapely.geometry.Polygon(geometries["coordinates"][0])
gdf = gpd.GeoDataFrame(geometry=[polygon])
gdf.crs = DEFAULT_CRS

Check warning on line 128 in openeo_processes_dask/process_implementations/cubes/_filter.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/_filter.py#L125-L128

Added lines #L125 - L128 were not covered by tests

bbox = gdf.total_bounds
spatial_extent = BoundingBox(
west=bbox[0], east=bbox[2], south=bbox[1], north=bbox[3]
)

data = filter_bbox(data, spatial_extent)
data = mask_polygon(data, geometries)

return data


def filter_bbox(data: RasterCube, extent: BoundingBox) -> RasterCube:
try:
input_crs = str(data.rio.crs)
Expand Down
165 changes: 165 additions & 0 deletions openeo_processes_dask/process_implementations/cubes/mask_polygon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import json
import logging
from typing import Any, Union

import dask.array as da
import geopandas as gpd
import numpy as np
import rasterio
import rioxarray
import shapely
import xarray as xr
from xarray.core import dtypes

from openeo_processes_dask.process_implementations.data_model import (
RasterCube,
VectorCube,
)

DEFAULT_CRS = "EPSG:4326"


logger = logging.getLogger(__name__)

__all__ = [
"mask_polygon",
]


def mask_polygon(
data: RasterCube,
mask: Union[VectorCube, str],
replacement: Any = dtypes.NA,
inside: bool = True,
) -> RasterCube:
y_dim = data.openeo.y_dim
x_dim = data.openeo.x_dim
t_dim = data.openeo.temporal_dims
b_dim = data.openeo.band_dims

if len(t_dim) == 0:
t_dim = None

Check warning on line 41 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L41

Added line #L41 was not covered by tests
else:
t_dim = t_dim[0]
if len(b_dim) == 0:
b_dim = None

Check warning on line 45 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L45

Added line #L45 was not covered by tests
else:
b_dim = b_dim[0]

y_dim_size = data.sizes[y_dim]
x_dim_size = data.sizes[x_dim]

# Reproject vector data to match the raster data cube.
## Get the CRS of data cube
try:
data_crs = str(data.rio.crs)
except Exception as e:
raise Exception(f"Not possible to estimate the input data projection! {e}")

Check warning on line 57 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L56-L57

Added lines #L56 - L57 were not covered by tests

data = data.rio.set_crs(data_crs)

## Reproject vector data if the input vector data is Polygon or Multi Polygon
if "type" in mask and mask["type"] == "FeatureCollection":
geometries = gpd.GeoDataFrame.from_features(mask, DEFAULT_CRS)
geometries = geometries.to_crs(data_crs)
geometries = geometries.to_json()
geometries = json.loads(geometries)
elif "type" in mask and mask["type"] in ["Polygon"]:
polygon = shapely.geometry.Polygon(mask["coordinates"][0])
geometries = gpd.GeoDataFrame(geometry=[polygon])
geometries.crs = DEFAULT_CRS
geometries = geometries.to_crs(data_crs)
geometries = geometries.to_json()
geometries = json.loads(geometries)

Check warning on line 73 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L67-L73

Added lines #L67 - L73 were not covered by tests
else:
raise ValueError(

Check warning on line 75 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L75

Added line #L75 was not covered by tests
"Unsupported or missing geometry type. Expected 'Polygon' or 'FeatureCollection'."
)

data_dims = list(data.dims)

# Get the Affine transformer
transform = data.rio.transform()

# Initialize an empty mask
# Set the same chunk size as the input data
data_chunks = {}
chunks_shapes = data.chunks
for i, d in enumerate(data_dims):
data_chunks[d] = chunks_shapes[i][0]

if data_dims.index(x_dim[0]) < data_dims.index(y_dim[0]):
final_mask = da.zeros(
(x_dim_size, y_dim_size),
chunks={x_dim: data_chunks[x_dim], y_dim: data_chunks[y_dim]},
dtype=bool,
)

dask_out_shape = da.from_array(
(x_dim_size, y_dim_size),
chunks={x_dim: data_chunks[x_dim], y_dim: data_chunks[y_dim]},
)
else:
final_mask = da.zeros(

Check warning on line 103 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L103

Added line #L103 was not covered by tests
(y_dim_size, x_dim_size),
chunks={y_dim: data_chunks[y_dim], x_dim: data_chunks[x_dim]},
dtype=bool,
)

dask_out_shape = da.from_array(

Check warning on line 109 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L109

Added line #L109 was not covered by tests
(y_dim_size, x_dim_size),
chunks={y_dim: data_chunks[y_dim], x_dim: data_chunks[x_dim]},
)

# CHECK IF the input single polygon or multiple Polygons
if "type" in geometries and geometries["type"] == "FeatureCollection":
for feature in geometries["features"]:
polygon = shapely.geometry.Polygon(feature["geometry"]["coordinates"][0])

# Create a GeoSeries from the geometry
geo_series = gpd.GeoSeries(polygon)

# Convert the GeoSeries to a GeometryArray
geometry_array = geo_series.geometry.array

mask = da.map_blocks(
rasterio.features.geometry_mask,
geometry_array,
transform=transform,
out_shape=dask_out_shape,
dtype=bool,
invert=inside,
)
final_mask |= mask

elif "type" in geometries and geometries["type"] in ["Polygon"]:
polygon = shapely.geometry.Polygon(geometries["coordinates"][0])
geo_series = gpd.GeoSeries(polygon)

Check warning on line 137 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L135-L137

Added lines #L135 - L137 were not covered by tests

# Convert the GeoSeries to a GeometryArray
geometry_array = geo_series.geometry.array
mask = da.map_blocks(

Check warning on line 141 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L140-L141

Added lines #L140 - L141 were not covered by tests
rasterio.features.geometry_mask,
geometry_array,
transform=transform,
out_shape=dask_out_shape,
dtype=bool,
invert=inside,
)
final_mask |= mask

Check warning on line 149 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L149

Added line #L149 was not covered by tests

masked_dims = len(final_mask.shape)

diff_axes = []
for axis in range(len(data_dims)):
try:
if final_mask.shape[axis] != data.shape[axis]:
diff_axes.append(axis)

Check warning on line 157 in openeo_processes_dask/process_implementations/cubes/mask_polygon.py

View check run for this annotation

Codecov / codecov/patch

openeo_processes_dask/process_implementations/cubes/mask_polygon.py#L157

Added line #L157 was not covered by tests
except:
if len(diff_axes) < (len(data_dims) - 2):
diff_axes.append(axis)

final_mask = np.expand_dims(final_mask, axis=diff_axes)
filtered_ds = data.where(final_mask, other=replacement)

return filtered_ds
25 changes: 25 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import importlib
import inspect
import json
import logging

import dask_geopandas
Expand All @@ -14,6 +15,7 @@
BoundingBox,
TemporalInterval,
)
from shapely.geometry import Polygon

from openeo_processes_dask.process_implementations.core import process
from openeo_processes_dask.process_implementations.data_model import VectorCube
Expand Down Expand Up @@ -68,6 +70,29 @@ def bounding_box_small(
return BoundingBox.parse_obj(spatial_extent)


@pytest.fixture
def polygon_geometry_small(
west=10.47, east=10.48, south=46.12, north=46.18, crs="EPSG:4326"
):
# Bounding box coordinates
west, east, south, north = 10.47, 10.48, 46.12, 46.18

# Create a small polygon
geometry = [
Polygon(
[(west, south), (west, north), (east, north), (east, south), (west, south)]
)
]

# Create a GeoDataFrame with a single polygon and default CRS 'wgs84'
gdf = gpd.GeoDataFrame(geometry=geometry, crs=crs)

geometries = gdf.to_json()
geometries = json.loads(geometries)

return geometries


@pytest.fixture
def temporal_interval(interval=["2018-05-01", "2018-06-01"]) -> TemporalInterval:
return TemporalInterval.parse_obj(interval)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from openeo_processes_dask.process_implementations.cubes._filter import (
filter_bands,
filter_bbox,
filter_spatial,
filter_temporal,
)
from openeo_processes_dask.process_implementations.cubes.reduce import reduce_dimension
Expand Down Expand Up @@ -83,6 +84,28 @@ def test_filter_bands(temporal_interval, bounding_box, random_raster_data):
assert output_cube["bands"].values == "SCL"


@pytest.mark.parametrize("size", [(30, 30, 30, 1)])
@pytest.mark.parametrize("dtype", [np.uint8])
def test_filter_spatial(
temporal_interval,
bounding_box,
random_raster_data,
polygon_geometry_small,
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02"],
backend="dask",
)

output_cube = filter_spatial(data=input_cube, geometries=polygon_geometry_small)

assert len(output_cube.y) < len(input_cube.y)
assert len(output_cube.x) < len(input_cube.x)


@pytest.mark.parametrize("size", [(30, 30, 1, 1)])
@pytest.mark.parametrize("dtype", [np.uint8])
def test_filter_bbox(
Expand Down
31 changes: 31 additions & 0 deletions tests/test_mask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
import pytest
from openeo_pg_parser_networkx.pg_schema import TemporalInterval

from openeo_processes_dask.process_implementations.cubes.mask_polygon import (
mask_polygon,
)
from tests.mockdata import create_fake_rastercube


@pytest.mark.parametrize("size", [(30, 30, 30, 1)])
@pytest.mark.parametrize("dtype", [np.uint8])
def test_mask_polygon(
temporal_interval,
bounding_box,
random_raster_data,
polygon_geometry_small,
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02"],
backend="dask",
)

output_cube = mask_polygon(data=input_cube, mask=polygon_geometry_small)

assert np.isnan(output_cube).sum() > np.isnan(input_cube).sum()
assert len(output_cube.y) == len(input_cube.y)
assert len(output_cube.x) == len(input_cube.x)