Skip to content

Commit

Permalink
Improve array interface of Raster for consistency with `np.ma.maske…
Browse files Browse the repository at this point in the history
…d_array` operations (#302)

* Incremental commit on array interface

* Start tests

* Incremental commit on array interface

* Replace float128 by longdouble

* Incremental commit on array interface

* Finalization of __array_ufunc__ interface and tests

* Add a mask to __array_ufunc__ tests

* Increment commit on array functions

* Finalize array functions and linting

* Add regex ignore codespell on nin (which is a numpy variable, cannot be changed)

* Update descriptions

* Making black and isort happy simultaneously

* Uncomment reflectivity test with np.ndarray and enable 2D broadcast for reflective arithmetic operations

* Account for amaury comments

* Final linting

* Change call to __class__ to from_array

* Change call to __class__ to from_array

* Linting
  • Loading branch information
rhugonnet authored Sep 13, 2022
1 parent abfb127 commit 96c432c
Show file tree
Hide file tree
Showing 3 changed files with 446 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repos:
rev: v2.0.0
hooks:
- id: codespell
args: [--ignore-words-list=alos]
args: [--ignore-words-list=alos, --ignore-regex=\bnin\b]
types_or: [python, rst, markdown]
files: ^(geoutils|doc|tests)/

Expand Down
143 changes: 135 additions & 8 deletions geoutils/georaster/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,43 @@

RasterType = TypeVar("RasterType", bound="Raster")

# List of numpy functions that are handled: nan statistics function, normal statistics function and sorting/counting
_HANDLED_FUNCTIONS = (
[
"nansum",
"nanmax",
"nanmin",
"nanargmax",
"nanargmin",
"nanmean",
"nanmedian",
"nanpercentile",
"nanvar",
"nanstd",
"nanprod",
"nancumsum",
"nancumprod",
"nanquantile",
]
+ [
"sum",
"amax",
"amin",
"argmax",
"argmin",
"mean",
"median",
"percentile",
"var",
"std",
"prod",
"cumsum",
"cumprod",
"quantile",
]
+ ["sort", "count_nonzero", "unique"]
)


# Function to set the default nodata values for any given dtype
# Similar to GDAL for int types, but without absurdly long nodata values for floats.
Expand All @@ -64,9 +101,11 @@ def _default_ndv(dtype: str | np.dtype | type) -> int:
"int16": -32768,
"uint32": 99999,
"int32": -99999,
"float16": -99999,
"float32": -99999,
"float64": -99999,
"longdouble": -99999,
"float128": -99999,
"longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility
}
# Check argument dtype is as expected
if not isinstance(dtype, (str, np.dtype, type)):
Expand Down Expand Up @@ -559,12 +598,17 @@ def _overloading_check(
# Case 2 - other is a numpy array
elif isinstance(other, np.ndarray):
# Check that both array have the same shape
if self.data.shape == other.shape:

if len(other.shape) == 2:
other_data = other[np.newaxis, :, :]
else:
other_data = other

if self.data.shape == other_data.shape:
pass
else:
raise ValueError("Both rasters must have the same shape.")

other_data = other
ndv2 = None
dtype2 = other_data.dtype

Expand Down Expand Up @@ -958,12 +1002,95 @@ def copy(self: RasterType, new_array: np.ndarray | None = None) -> RasterType:

return cp

@property
def __array_interface__(self) -> dict[str, Any]:
if not self.is_loaded:
self.load()
def __array__(self) -> np.ndarray:
"""Method to cast np.array() or np.asarray() function directly on Raster classes."""

return self._data

def __array_ufunc__(
self,
ufunc: Callable[[np.ndarray | tuple[np.ndarray, np.ndarray]], np.ndarray | tuple[np.ndarray, np.ndarray]],
method: str,
*inputs: Raster | tuple[Raster, Raster],
**kwargs: Any,
) -> Raster | tuple[Raster, Raster]:
"""
Method to cast NumPy universal functions directly on Raster classes, by passing to the masked array.
This function basically applies the ufunc (with its method and kwargs) to .data, and rebuilds the Raster from
self.__class__. The cases separate the number of input nin and output nout, to properly feed .data and return
Raster objects.
See more details in NumPy doc, e.g., https://numpy.org/doc/stable/user/basics.dispatch.html#basics-dispatch.
"""

# If the universal function takes only one input
if ufunc.nin == 1:
# If the universal function has only one output
if ufunc.nout == 1:
return self.from_array(
data=getattr(ufunc, method)(inputs[0].data, **kwargs), # type: ignore
transform=self.transform,
crs=self.crs,
nodata=self.nodata,
) # type: ignore

# If the universal function has two outputs (Note: no ufunc exists that has three outputs or more)
else:
output = getattr(ufunc, method)(inputs[0].data, **kwargs) # type: ignore
return self.from_array(
data=output[0], transform=self.transform, crs=self.crs, nodata=self.nodata
), self.from_array(data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata)

# If the universal function takes two inputs (Note: no ufunc exists that has three inputs or more)
else:
if ufunc.nout == 1:
return self.from_array(
data=getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), # type: ignore
transform=self.transform,
crs=self.crs,
nodata=self.nodata,
)

# If the universal function has two outputs (Note: no ufunc exists that has three outputs or more)
else:
output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) # type: ignore
return self.from_array(
data=output[0], transform=self.transform, crs=self.crs, nodata=self.nodata
), self.from_array(data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata)

def __array_function__(
self, func: Callable[[np.ndarray, Any], Any], types: tuple[type], args: Any, kwargs: Any
) -> Any:
"""
Method to cast NumPy array function directly on a Raster object by applying it to the masked array.
A limited number of function is supported, listed in raster._HANDLED_FUNCTIONS.
"""

# If function is not implemented
if func.__name__ not in _HANDLED_FUNCTIONS:
return NotImplemented

# For subclassing
if not all(issubclass(t, self.__class__) for t in types):
return NotImplemented

# We now choose the behaviour of array functions
# For median, np.median ignores masks of masked array, so we force np.ma.median
if func.__name__ in ["median", "nanmedian"]:
func = np.ma.median
first_arg = args[0].data

# For percentiles and quantiles, there exist no masked array version, so we compute on the valid data directly
elif func.__name__ in ["percentile", "nanpercentile"]:
first_arg = args[0].data.compressed()

elif func.__name__ in ["quantile", "nanquantile"]:
first_arg = args[0].data.compressed()

# Otherwise, we run the numpy function normally (most take masks into account)
else:
first_arg = args[0].data

return self._data.__array_interface__ # type: ignore
return func(first_arg, *args[1:], **kwargs) # type: ignore

# Note the star is needed because of the default argument 'mode' preceding non default arg 'inplace'
# Then the final overload must be duplicated
Expand Down
Loading

0 comments on commit 96c432c

Please sign in to comment.