Skip to content

Commit

Permalink
Geospatial search api and ready for 1.9.0-rc9 release (#1611)
Browse files Browse the repository at this point in the history
* Use 'geopolygon' instead of 'geometry' for index driver API, for consistency with dc.load

* core api cleanup

* Core cleanup 2.

* Core cleanup 3

* Give index layer responsibility for handling spatial coordinates.

* Cleanup refactor.

* Update whats_new.rst

* lintage.

* Cleanup - mostly mypy.

* Bit mmore coverage.

* Bit mmore coverage.

* Oops.  :|

* Update odc-geo requirement.

* Work around change in odc-geo behaviour.

* Update whats_new.rst ready for rc9 pre-release.

* Cleanup based on Ariana's comments.
  • Loading branch information
SpacemanPaul authored Jul 3, 2024
1 parent 13c06d6 commit b012f31
Show file tree
Hide file tree
Showing 22 changed files with 315 additions and 211 deletions.
20 changes: 13 additions & 7 deletions datacube/api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
from datacube.model import ExtraDimensions, ExtraDimensionSlices, Dataset, Measurement, GridSpec
from datacube.model.utils import xr_apply

from .query import Query, query_group_by, query_geopolygon, GroupBy
from ..index import index_connect, Index
from .query import Query, query_group_by, GroupBy
from ..index import index_connect, Index, extract_geom_from_query
from ..drivers import new_datasource
from ..index.abstract import QueryField
from ..model import QueryField
from ..migration import ODC2DeprecationWarning
from ..storage._load import ProgressFunction, FuserFunction

Expand Down Expand Up @@ -300,6 +300,13 @@ def load(self,
geopolygon=polygon(coords, crs="EPSG:3577")
Or an iterable of polygons (search is done against the union of all polygons:
geopolygon=[poly1, poly2, poly3, ....]
You can also pass a WKT string, or a GeoJSON string or any other object that can be passed to the
odc.geo.Geometry constructor, or an iterable of any of the above.
Performance and accuracy of geopolygon queries may vary depending on the index driver in use and the CRS.
The ``time`` dimension can be specified using a single or tuple of datetime objects or strings with
Expand Down Expand Up @@ -540,8 +547,6 @@ def filter_jan(dataset): return dataset.time.begin.month == 1

measurement_dicts = datacube_product.lookup_measurements(measurements)

# `extra_dims` put last for backwards compability, but should really be the second position
# betwween `grouped` and `geobox`
result = self.load_data(grouped, geobox,
measurement_dicts,
resampling=resampling,
Expand Down Expand Up @@ -614,7 +619,7 @@ def find_datasets_lazy(self,
datasets = self.index.datasets.search(limit=limit,
**query.search_terms)

if query.geopolygon is not None:
if query.geopolygon is not None and not self.index.supports_spatial_indexes:
datasets = select_datasets_inside_polygon(datasets, query.geopolygon)

if ensure_location:
Expand Down Expand Up @@ -1078,7 +1083,7 @@ def output_geobox(like: GeoBox | xarray.Dataset | xarray.DataArray | None = None
# 3. Computed from dataset footprints
# 4. fail with ValueError
if geopolygon is None:
geopolygon = query_geopolygon(**query)
geopolygon = extract_geom_from_query(**query)

if geopolygon is None:
if datasets is None:
Expand All @@ -1105,6 +1110,7 @@ def output_geobox(like: GeoBox | xarray.Dataset | xarray.DataArray | None = None

def select_datasets_inside_polygon(datasets: Iterable[Dataset], polygon: Geometry) -> Iterable[Dataset]:
# Check against the bounding box of the original scene, can throw away some portions
# (Only needed for index drivers without spatial index support)
assert polygon is not None
query_crs = polygon.crs
for dataset in datasets:
Expand Down
110 changes: 27 additions & 83 deletions datacube/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,18 @@
import logging
import datetime
import collections
import math
import warnings
from typing import Optional, Union
import pandas

from pandas import to_datetime as pandas_to_datetime
import numpy as np


from ..index import extract_geom_from_query, strip_all_spatial_fields_from_query
from ..model import Range, Dataset
from ..utils.dates import normalise_dt, tz_aware

from odc.geo import CRS, Geometry
from odc.geo.geom import (
lonlat_bounds,
point,
line,
polygon,
mid_longitude,
)
from odc.geo import Geometry
from odc.geo.geom import lonlat_bounds, mid_longitude

_LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -59,8 +52,6 @@ def __init__(self, group_by_func, dimension, units, sort_key=None, group_key=Non
self.group_key = group_key


SPATIAL_KEYS = ('latitude', 'lat', 'y', 'longitude', 'lon', 'long', 'x')
CRS_KEYS = ('crs', 'coordinate_reference_system')
OTHER_KEYS = ('measurements', 'group_by', 'output_crs', 'resolution', 'set_nan', 'product', 'geopolygon', 'like',
'source_filter')

Expand All @@ -79,14 +70,18 @@ def __init__(self, index=None, product=None, geopolygon=None, like=None, **searc
Range(begin=datetime.datetime(2001, 1, 1, 0, 0, tzinfo=tzutc()), \
end=datetime.datetime(2002, 1, 1, 23, 59, 59, 999999, tzinfo=tzutc()))
By passing in an ``index``, the search parameters will be validated as existing on the ``product``.
By passing in an ``index``, the search parameters will be validated as existing on the ``product``,
and a spatial search appropriate for the index driver can be extracted.
Used by :meth:`datacube.Datacube.find_datasets` and :meth:`datacube.Datacube.load`.
:param datacube.index.Index index: An optional `index` object, if checking of field names is desired.
:param str product: name of product
:param geopolygon: spatial bounds of the search
:type geopolygon: geometry.Geometry or None
:type geopolygon: the spatial boundaries of the search, can be:
odc.geo.geom.Geometry: A Geometry object
Any string or JsonLike object that can be converted to a Geometry object.
An iterable of either of the above; or
None: no geopolygon defined (may be derived from like or lat/lon/x/y/crs search terms)
:param xarray.Dataset like: spatio-temporal bounds of `like` are used for the search
:param search_terms:
* `measurements` - list of measurements to retrieve
Expand All @@ -96,15 +91,17 @@ def __init__(self, index=None, product=None, geopolygon=None, like=None, **searc
* `crs` - spatial coordinate reference system to interpret the spatial bounds
* `group_by` - observation grouping method. One of `time`, `solar_day`. Default is `time`
"""
self.index = index
self.product = product
self.geopolygon = query_geopolygon(geopolygon=geopolygon, **search_terms)
self.geopolygon = extract_geom_from_query(geopolygon=geopolygon, **search_terms)
if 'source_filter' in search_terms and search_terms['source_filter'] is not None:
self.source_filter = Query(**search_terms['source_filter'])
else:
self.source_filter = None

remaining_keys = set(search_terms.keys()) - set(SPATIAL_KEYS + CRS_KEYS + OTHER_KEYS)
if index:
search_terms = strip_all_spatial_fields_from_query(search_terms)
remaining_keys = set(search_terms.keys()) - set(OTHER_KEYS)
if self.index:
# Retrieve known keys for extra dimensions
known_dim_keys = set()
if product is not None:
Expand Down Expand Up @@ -149,15 +146,18 @@ def search_terms(self):
kwargs = {}
kwargs.update(self.search)
if self.geopolygon:
geo_bb = lonlat_bounds(self.geopolygon, resolution=100_000) # TODO: pick resolution better
if geo_bb.bottom != geo_bb.top:
kwargs['lat'] = Range(geo_bb.bottom, geo_bb.top)
else:
kwargs['lat'] = geo_bb.bottom
if geo_bb.left != geo_bb.right:
kwargs['lon'] = Range(geo_bb.left, geo_bb.right)
if self.index and self.index.supports_spatial_indexes:
kwargs['geopolygon'] = self.geopolygon
else:
kwargs['lon'] = geo_bb.left
geo_bb = lonlat_bounds(self.geopolygon, resolution="auto")
if math.isclose(geo_bb.bottom, geo_bb.top, abs_tol=1e-5):
kwargs['lat'] = geo_bb.bottom
else:
kwargs['lat'] = Range(geo_bb.bottom, geo_bb.top)
if math.isclose(geo_bb.left, geo_bb.right, abs_tol=1e-5):
kwargs['lon'] = geo_bb.left
else:
kwargs['lon'] = Range(geo_bb.left, geo_bb.right)
if self.product:
kwargs['product'] = self.product
if self.source_filter:
Expand All @@ -177,23 +177,6 @@ def __str__(self):
geopolygon=self.geopolygon)


def query_geopolygon(geopolygon=None, **kwargs):
spatial_dims = {dim: v for dim, v in kwargs.items() if dim in SPATIAL_KEYS}
crs = [v for k, v in kwargs.items() if k in CRS_KEYS]
if len(crs) == 1:
spatial_dims['crs'] = crs[0]
elif len(crs) > 1:
raise ValueError('CRS is supplied twice')

if geopolygon is not None and len(spatial_dims) > 0:
raise ValueError('Cannot specify "geopolygon" and one of %s at the same time' % (SPATIAL_KEYS + CRS_KEYS,))

if geopolygon is None:
return _range_to_geopolygon(**spatial_dims)

return geopolygon


def _extract_time_from_ds(ds: Dataset) -> datetime.datetime:
return normalise_dt(ds.center_time)

Expand Down Expand Up @@ -245,45 +228,6 @@ def query_group_by(group_by='time', **kwargs):
)


def _range_to_geopolygon(**kwargs):
input_crs = None
input_coords = {'left': None, 'bottom': None, 'right': None, 'top': None}
for key, value in kwargs.items():
if value is None:
continue
key = key.lower()
if key in ['latitude', 'lat', 'y']:
input_coords['top'], input_coords['bottom'] = _value_to_range(value)
if key in ['longitude', 'lon', 'long', 'x']:
input_coords['left'], input_coords['right'] = _value_to_range(value)
if key in ['crs', 'coordinate_reference_system']:
input_crs = CRS(value)
input_crs = input_crs or CRS('EPSG:4326')
if any(v is not None for v in input_coords.values()):
if input_coords['left'] == input_coords['right']:
if input_coords['top'] == input_coords['bottom']:
return point(input_coords['left'], input_coords['top'], crs=input_crs)
else:
points = [(input_coords['left'], input_coords['bottom']),
(input_coords['left'], input_coords['top'])]
return line(points, crs=input_crs)
else:
if input_coords['top'] == input_coords['bottom']:
points = [(input_coords['left'], input_coords['top']),
(input_coords['right'], input_coords['top'])]
return line(points, crs=input_crs)
else:
points = [
(input_coords['left'], input_coords['top']),
(input_coords['right'], input_coords['top']),
(input_coords['right'], input_coords['bottom']),
(input_coords['left'], input_coords['bottom']),
(input_coords['left'], input_coords['top'])
]
return polygon(points, crs=input_crs)
return None


def _value_to_range(value):
if isinstance(value, (str, float, int)):
value = float(value)
Expand Down
19 changes: 17 additions & 2 deletions datacube/drivers/netcdf/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,16 @@ def _write_geographical_extents_attributes(nco, extent):
# nco.geospatial_lon_resolution = "{} degrees".format(abs(geobox.affine.a))


class DimensionWrapper:
"""
Needed calling data_resolution_and_offset() from odc-geo 0.4.4
TODO: Remove this code and pin odc-geo if/when this gets fixed there.
"""
def __init__(self, dim):
self.values = dim


def create_grid_mapping_variable(nco, crs, name=DEFAULT_GRID_MAPPING):
if crs.geographic:
crs_var = _create_latlon_grid_mapping_variable(nco, crs, name)
Expand All @@ -271,8 +281,13 @@ def create_grid_mapping_variable(nco, crs, name=DEFAULT_GRID_MAPPING):
crs_var.spatial_ref = crs.wkt

dims = crs.dimensions
xres, xoff = data_resolution_and_offset(nco[dims[1]])
yres, yoff = data_resolution_and_offset(nco[dims[0]])
try:
xres, xoff = data_resolution_and_offset(nco[dims[1]])
yres, yoff = data_resolution_and_offset(nco[dims[0]])
except AttributeError:
xres, xoff = data_resolution_and_offset(DimensionWrapper(nco[dims[1]]))
yres, yoff = data_resolution_and_offset(DimensionWrapper(nco[dims[0]]))

crs_var.GeoTransform = [xoff, xres, 0.0, yoff, 0.0, yres]

left, right = nco[dims[1]][0] - 0.5 * xres, nco[dims[1]][-1] + 0.5 * xres
Expand Down
3 changes: 3 additions & 0 deletions datacube/index/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
"""

from ._api import index_connect
from ._spatial import strip_all_spatial_fields_from_query, extract_geom_from_query
from .fields import UnknownFieldError
from .exceptions import DuplicateRecordError, MissingRecordError, IndexSetupError
from datacube.index.abstract import AbstractIndex as Index

__all__ = [
'strip_all_spatial_fields_from_query',
'extract_geom_from_query',
'index_connect',
'Index',

Expand Down
Loading

0 comments on commit b012f31

Please sign in to comment.