Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve array interface of Raster for consistency with np.ma.masked_array operations #302

Merged
merged 19 commits into from
Sep 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repos:
rev: v2.0.0
hooks:
- id: codespell
args: [--ignore-words-list=alos]
args: [--ignore-words-list=alos, --ignore-regex=\bnin\b]
types_or: [python, rst, markdown]
files: ^(geoutils|doc|tests)/

Expand Down
143 changes: 135 additions & 8 deletions geoutils/georaster/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,43 @@

RasterType = TypeVar("RasterType", bound="Raster")

# List of numpy functions that are handled: nan statistics function, normal statistics function and sorting/counting
_HANDLED_FUNCTIONS = (
[
"nansum",
"nanmax",
"nanmin",
"nanargmax",
"nanargmin",
"nanmean",
"nanmedian",
"nanpercentile",
"nanvar",
"nanstd",
"nanprod",
"nancumsum",
"nancumprod",
"nanquantile",
]
+ [
"sum",
"amax",
"amin",
"argmax",
"argmin",
"mean",
"median",
"percentile",
"var",
"std",
"prod",
"cumsum",
"cumprod",
"quantile",
]
+ ["sort", "count_nonzero", "unique"]
)


# Function to set the default nodata values for any given dtype
# Similar to GDAL for int types, but without absurdly long nodata values for floats.
Expand All @@ -64,9 +101,11 @@ def _default_ndv(dtype: str | np.dtype | type) -> int:
"int16": -32768,
"uint32": 99999,
"int32": -99999,
"float16": -99999,
"float32": -99999,
"float64": -99999,
"longdouble": -99999,
"float128": -99999,
"longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility
}
# Check argument dtype is as expected
if not isinstance(dtype, (str, np.dtype, type)):
Expand Down Expand Up @@ -559,12 +598,17 @@ def _overloading_check(
# Case 2 - other is a numpy array
elif isinstance(other, np.ndarray):
# Check that both array have the same shape
if self.data.shape == other.shape:

if len(other.shape) == 2:
rhugonnet marked this conversation as resolved.
Show resolved Hide resolved
other_data = other[np.newaxis, :, :]
else:
other_data = other

if self.data.shape == other_data.shape:
pass
else:
raise ValueError("Both rasters must have the same shape.")

other_data = other
ndv2 = None
dtype2 = other_data.dtype

Expand Down Expand Up @@ -958,12 +1002,95 @@ def copy(self: RasterType, new_array: np.ndarray | None = None) -> RasterType:

return cp

@property
def __array_interface__(self) -> dict[str, Any]:
if not self.is_loaded:
self.load()
def __array__(self) -> np.ndarray:
"""Method to cast np.array() or np.asarray() function directly on Raster classes."""

return self._data

def __array_ufunc__(
self,
ufunc: Callable[[np.ndarray | tuple[np.ndarray, np.ndarray]], np.ndarray | tuple[np.ndarray, np.ndarray]],
method: str,
*inputs: Raster | tuple[Raster, Raster],
**kwargs: Any,
) -> Raster | tuple[Raster, Raster]:
"""
Method to cast NumPy universal functions directly on Raster classes, by passing to the masked array.
This function basically applies the ufunc (with its method and kwargs) to .data, and rebuilds the Raster from
self.__class__. The cases separate the number of input nin and output nout, to properly feed .data and return
Raster objects.
See more details in NumPy doc, e.g., https://numpy.org/doc/stable/user/basics.dispatch.html#basics-dispatch.
"""

# If the universal function takes only one input
if ufunc.nin == 1:
# If the universal function has only one output
if ufunc.nout == 1:
return self.from_array(
data=getattr(ufunc, method)(inputs[0].data, **kwargs), # type: ignore
transform=self.transform,
crs=self.crs,
nodata=self.nodata,
) # type: ignore

# If the universal function has two outputs (Note: no ufunc exists that has three outputs or more)
else:
output = getattr(ufunc, method)(inputs[0].data, **kwargs) # type: ignore
return self.from_array(
data=output[0], transform=self.transform, crs=self.crs, nodata=self.nodata
), self.from_array(data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata)

# If the universal function takes two inputs (Note: no ufunc exists that has three inputs or more)
else:
if ufunc.nout == 1:
return self.from_array(
data=getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), # type: ignore
transform=self.transform,
crs=self.crs,
nodata=self.nodata,
)

# If the universal function has two outputs (Note: no ufunc exists that has three outputs or more)
else:
output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) # type: ignore
return self.from_array(
data=output[0], transform=self.transform, crs=self.crs, nodata=self.nodata
), self.from_array(data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata)

def __array_function__(
self, func: Callable[[np.ndarray, Any], Any], types: tuple[type], args: Any, kwargs: Any
) -> Any:
"""
Method to cast NumPy array function directly on a Raster object by applying it to the masked array.
A limited number of function is supported, listed in raster._HANDLED_FUNCTIONS.
"""

# If function is not implemented
if func.__name__ not in _HANDLED_FUNCTIONS:
return NotImplemented

# For subclassing
if not all(issubclass(t, self.__class__) for t in types):
return NotImplemented

# We now choose the behaviour of array functions
# For median, np.median ignores masks of masked array, so we force np.ma.median
if func.__name__ in ["median", "nanmedian"]:
func = np.ma.median
first_arg = args[0].data

# For percentiles and quantiles, there exist no masked array version, so we compute on the valid data directly
elif func.__name__ in ["percentile", "nanpercentile"]:
first_arg = args[0].data.compressed()

elif func.__name__ in ["quantile", "nanquantile"]:
first_arg = args[0].data.compressed()

# Otherwise, we run the numpy function normally (most take masks into account)
else:
first_arg = args[0].data

return self._data.__array_interface__ # type: ignore
return func(first_arg, *args[1:], **kwargs) # type: ignore

# Note the star is needed because of the default argument 'mode' preceding non default arg 'inplace'
# Then the final overload must be duplicated
Expand Down
Loading