From d90bfa8d98570846300164e462643117fe4728dd Mon Sep 17 00:00:00 2001 From: rhugonne Date: Tue, 6 Sep 2022 21:08:48 +0200 Subject: [PATCH 01/18] Incremental commit on array interface --- geoutils/georaster/raster.py | 30 +++++++++++++++++++++++++++--- tests/test_georaster.py | 1 + 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index 82ce7f5e..bada40b9 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -939,12 +939,36 @@ def copy(self: RasterType, new_array: np.ndarray | None = None) -> RasterType: return cp - @property - def __array_interface__(self) -> dict[str, Any]: + # Not sure yet how this behaves with __array__, __array_ufunc__ and __array_function__ implemented + # @property + # def __array_interface__(self) -> dict[str, Any]: + # if not self.is_loaded: + # self.load() + # + # return self._data.__array_interface__ # type: ignore + + # Set up array interface for our class container + # Inspired from https://numpy.org/doc/stable/user/basics.dispatch.html#basics-dispatch + def __array__(self): + + return self._data + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> RasterType: + + if not self.is_loaded: + self.load() + + return self.__class__({"data": ufunc(self._data, *inputs, **kwargs), + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}) # type: ignore + + def __array_function__(self, func, types, args, kwargs): + if not self.is_loaded: self.load() - return self._data.__array_interface__ # type: ignore + return func(self._data, *kwargs) # type: ignore # Note the star is needed because of the default argument 'mode' preceding non default arg 'inplace' # Then the final overload must be duplicated diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 33c0dec5..3a83c1b2 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -39,6 +39,7 @@ class TestRaster: def test_init(self, example: str) -> None: """Test that all possible inputs work properly in Raster class init""" + example = aster_dem_path # First, filename r = gr.Raster(example) assert isinstance(r, gr.Raster) From 147db6755a8b7a510f5b304a00c64030867c6d70 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Tue, 6 Sep 2022 22:46:00 +0200 Subject: [PATCH 02/18] Start tests --- tests/test_georaster.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 3a83c1b2..43e58f3b 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -1352,7 +1352,7 @@ def test_numpy_functions(dtype: str) -> None: assert np.median(raster) == 26.0 -class TestsArithmetic: +class TestArithmetic: """ Test that all arithmetic overloading functions work as expected. """ @@ -1609,7 +1609,7 @@ def test_reflectivity(self, ops: list[str]) -> None: @classmethod def from_array( - cls: type[TestsArithmetic], + cls: type[TestArithmetic], data: np.ndarray | np.ma.masked_array, rst_ref: gr.RasterType, nodata: int | float | list[int] | list[float] | None = None, @@ -1713,3 +1713,32 @@ def test_power(self, power: float | int) -> None: if power > 0: # Integers to negative integer powers are not allowed. assert self.r1**power == self.from_array(self.r1.data**power, rst_ref=self.r1) assert self.r1_f32**power == self.from_array(self.r1_f32.data**power, rst_ref=self.r1_f32) + + +class TestArrayInterface: + """Test that the array interface of Raster works as expected for ufuncs and array functions""" + + # All universal functions of NumPy, about 90 in 2022. See list: https://numpy.org/doc/stable/reference/ufuncs.html + ufuncs = [ufunc for ufunc in np.core.umath.__all__ if (ufunc[0] != '_' and ufunc.islower())] + + @pytest.mark.parametrize("ufunc", ufuncs) + @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", + "float32", "float64", "float128"]) + @pytest.mark.parametrize("nodata_init", [None, "type_default"]) + def test_array_ufunc(self, ufunc, nodata_init, dtype): + """Test that ufuncs consistently return the same result as for the np.ma.masked_array""" + + if nodata_init == "type_default": + nodata: int | None = _default_ndv(dtype) + else: + nodata = None + + width = height = 5 + transform = rio.transform.from_bounds(0, 0, 1, 1, width, height) + np.random.seed(42) + r1 = gr.Raster.from_array(np.random.randint(1, 255, (height, width), dtype="uint8"), transform=transform, + crs=None) + r2 = gr.Raster.from_array(np.random.randint(1, 255, (height, width), dtype="uint8"), transform=transform, + crs=None) + + From edb967cfd2850bf264aa42c151416f17305cb3d4 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Tue, 6 Sep 2022 23:47:18 +0200 Subject: [PATCH 03/18] Incremental commit on array interface --- geoutils/georaster/raster.py | 18 ++++++++++++------ tests/test_georaster.py | 33 +++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index bada40b9..cbc60c60 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -895,7 +895,7 @@ def info(self, stats: bool = False) -> str: f"Size: {self.width}, {self.height}\n", f"Number of bands: {self.count:d}\n", f"Data types: {self.dtypes}\n", - f"Coordinate System: EPSG:{self.crs.to_epsg()}\n", + f"Coordinate System: EPSG:{[self.crs.to_string() if self.crs is not None else None]}\n", f"NoData Value: {self.nodata}\n", "Pixel Size: {}, {}\n".format(*self.res), "Upper Left Corner: {}, {}\n".format(*self.bounds[:2]), @@ -943,7 +943,7 @@ def copy(self: RasterType, new_array: np.ndarray | None = None) -> RasterType: # @property # def __array_interface__(self) -> dict[str, Any]: # if not self.is_loaded: - # self.load() + # self.load()__array # # return self._data.__array_interface__ # type: ignore @@ -958,10 +958,16 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> RasterType: if not self.is_loaded: self.load() - return self.__class__({"data": ufunc(self._data, *inputs, **kwargs), - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}) # type: ignore + if ufunc.nin == 1: + return self.__class__({"data": getattr(ufunc, method)(inputs[0].data, **kwargs), + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}) + else: + return self.__class__({"data": getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}) def __array_function__(self, func, types, args, kwargs): diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 43e58f3b..5d31a237 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -1719,26 +1719,39 @@ class TestArrayInterface: """Test that the array interface of Raster works as expected for ufuncs and array functions""" # All universal functions of NumPy, about 90 in 2022. See list: https://numpy.org/doc/stable/reference/ufuncs.html - ufuncs = [ufunc for ufunc in np.core.umath.__all__ if (ufunc[0] != '_' and ufunc.islower())] + ufuncs_str = [ufunc for ufunc in np.core.umath.__all__ if (ufunc[0] != '_' and ufunc.islower() and 'seterr' not in ufunc)] - @pytest.mark.parametrize("ufunc", ufuncs) + max_val = np.iinfo("int32").max + min_val = np.iinfo("int32").min + # We create two random array of varying dtype + np.random.seed(42) + width = height = 5 + transform = rio.transform.from_bounds(0, 0, 1, 1, width, height) + arr1 = np.random.randint(min_val, max_val, (height, width), dtype="int32") + np.random.normal(size=(height, width)) + arr2 = np.random.randint(min_val, max_val, (height, width), dtype="int32") + np.random.normal(size=(height, width)) + + @pytest.mark.parametrize("ufunc_str", ufuncs_str) @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "float128"]) @pytest.mark.parametrize("nodata_init", [None, "type_default"]) - def test_array_ufunc(self, ufunc, nodata_init, dtype): + def test_array_ufunc(self, ufunc_str: str, nodata_init: None | str, dtype: str): """Test that ufuncs consistently return the same result as for the np.ma.masked_array""" + # We set the default nodata if nodata_init == "type_default": nodata: int | None = _default_ndv(dtype) else: nodata = None - width = height = 5 - transform = rio.transform.from_bounds(0, 0, 1, 1, width, height) - np.random.seed(42) - r1 = gr.Raster.from_array(np.random.randint(1, 255, (height, width), dtype="uint8"), transform=transform, - crs=None) - r2 = gr.Raster.from_array(np.random.randint(1, 255, (height, width), dtype="uint8"), transform=transform, - crs=None) + r1 = gr.Raster.from_array(self.arr1.astype(dtype), transform=self.transform, crs=None, nodata=nodata) + r2 = gr.Raster.from_array(self.arr2.astype(dtype), transform=self.transform, crs=None, nodata=nodata) + + ufunc = getattr(np, ufunc_str) + # If ufunc takes only one argument + if ufunc.nin == 1: + assert np.ma.allequal(ufunc(r1.data, casting="unsafe"), ufunc(r1, casting="unsafe").data) + elif ufunc.nin == 2: + assert np.ma.allequal(ufunc(r1.data, r2.data), ufunc(r1, r2).data) + From befc4a8a5e7ae2ccadebc30b2ac82ba853878bc5 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Wed, 7 Sep 2022 00:09:44 +0200 Subject: [PATCH 04/18] Replace float128 by longdouble --- tests/test_georaster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 5d31a237..2b354e3f 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -922,7 +922,7 @@ def test_default_ndv(self) -> None: assert _default_ndv("uint16") == np.iinfo("uint16").max assert _default_ndv("int16") == np.iinfo("int16").min assert _default_ndv("uint32") == 99999 - for dtype in ["int32", "float32", "float64", "float128"]: + for dtype in ["int32", "float32", "float64", "longdouble"]: assert _default_ndv(dtype) == -99999 # Check it works with most frequent np.dtypes too @@ -1732,7 +1732,7 @@ class TestArrayInterface: @pytest.mark.parametrize("ufunc_str", ufuncs_str) @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", - "float32", "float64", "float128"]) + "float32", "float64", "longdouble"]) @pytest.mark.parametrize("nodata_init", [None, "type_default"]) def test_array_ufunc(self, ufunc_str: str, nodata_init: None | str, dtype: str): """Test that ufuncs consistently return the same result as for the np.ma.masked_array""" From 9a150cb384dd6bdf6fc22599dcfa1a6b8111e8f7 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Wed, 7 Sep 2022 11:26:42 +0200 Subject: [PATCH 05/18] Incremental commit on array interface --- tests/test_georaster.py | 84 +++++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 16 deletions(-) diff --git a/tests/test_georaster.py b/tests/test_georaster.py index d53fe9ee..a86ec21a 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -40,7 +40,6 @@ class TestRaster: def test_init(self, example: str) -> None: """Test that all possible inputs work properly in Raster class init""" - example = aster_dem_path # First, filename r = gr.Raster(example) assert isinstance(r, gr.Raster) @@ -1920,23 +1919,31 @@ class TestArrayInterface: """Test that the array interface of Raster works as expected for ufuncs and array functions""" # All universal functions of NumPy, about 90 in 2022. See list: https://numpy.org/doc/stable/reference/ufuncs.html - ufuncs_str = [ufunc for ufunc in np.core.umath.__all__ if (ufunc[0] != '_' and ufunc.islower() and 'seterr' not in ufunc)] + ufuncs_str = [ufunc for ufunc in np.core.umath.__all__ + if (ufunc[0] != '_' and ufunc.islower() + and 'err' not in ufunc and ufunc not in ['e', 'pi', 'frompyfunc', 'euler_gamma'])] - max_val = np.iinfo("int32").max - min_val = np.iinfo("int32").min - # We create two random array of varying dtype - np.random.seed(42) + # Universal functions with single input argument + ufuncs_str_one_nin = [ufunc for ufunc in ufuncs_str if getattr(np, ufunc).nin == 1] + + # Universal functions with two input arguments + ufuncs_str_two_nin = [ufunc for ufunc in ufuncs_str if getattr(np, ufunc).nin == 2] + + # We create two random array of varying dtype width = height = 5 + min_val = np.iinfo("int32").min + max_val = np.iinfo("int32").max transform = rio.transform.from_bounds(0, 0, 1, 1, width, height) + np.random.seed(42) arr1 = np.random.randint(min_val, max_val, (height, width), dtype="int32") + np.random.normal(size=(height, width)) arr2 = np.random.randint(min_val, max_val, (height, width), dtype="int32") + np.random.normal(size=(height, width)) - @pytest.mark.parametrize("ufunc_str", ufuncs_str) + @pytest.mark.parametrize("ufunc_str", ufuncs_str_one_nin) @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"]) @pytest.mark.parametrize("nodata_init", [None, "type_default"]) - def test_array_ufunc(self, ufunc_str: str, nodata_init: None | str, dtype: str): - """Test that ufuncs consistently return the same result as for the np.ma.masked_array""" + def test_array_ufunc_one_nin(self, ufunc_str: str, nodata_init: None | str, dtype: str): + """Test that ufuncs with a single input argument consistently return the same result as for masked arrays.""" # We set the default nodata if nodata_init == "type_default": @@ -1944,15 +1951,60 @@ def test_array_ufunc(self, ufunc_str: str, nodata_init: None | str, dtype: str): else: nodata = None - r1 = gr.Raster.from_array(self.arr1.astype(dtype), transform=self.transform, crs=None, nodata=nodata) - r2 = gr.Raster.from_array(self.arr2.astype(dtype), transform=self.transform, crs=None, nodata=nodata) + # Create Raster + rst = gr.Raster.from_array(self.arr1.astype(dtype), transform=self.transform, crs=None, nodata=nodata) + + # Get ufunc + ufunc = getattr(np, ufunc_str) + + # Check if our input dtype is possible on this ufunc, if yes check that outputs are identical + if dtype in [str(np.dtype(t[0])) for t in ufunc.types]: + assert np.ma.allequal(ufunc(rst.data), ufunc(rst).data) + # If the input dtype is not possible, check that NumPy raises a TypeError + else: + with pytest.raises(TypeError): + ufunc(rst.data) + with pytest.raises(TypeError): + ufunc(rst) + + + @pytest.mark.parametrize("ufunc_str", ufuncs_str_two_nin) + @pytest.mark.parametrize("dtype1", ["uint8", "int8", "uint16", "int16", "uint32", "int32", + "float32", "float64", "longdouble"]) + @pytest.mark.parametrize("dtype2", ["uint8", "int8", "uint16", "int16", "uint32", "int32", + "float32", "float64", "longdouble"]) + @pytest.mark.parametrize("nodata1_init", [None, "type_default"]) + @pytest.mark.parametrize("nodata2_init", [None, "type_default"]) + def test_array_ufunc_two_nin(self, ufunc_str: str, nodata1_init: None | str, nodata2_init: str, dtype1: str, dtype2: str): + """Test that ufuncs with a single input argument consistently return the same result as for masked arrays.""" + + # We set the default nodatas + if nodata1_init == "type_default": + nodata1: int | None = _default_ndv(dtype1) + else: + nodata1 = None + if nodata2_init == "type_default": + nodata2: int | None = _default_ndv(dtype2) + else: + nodata2 = None + + rst1 = gr.Raster.from_array(self.arr1.astype(dtype1), transform=self.transform, crs=None, nodata=nodata1) + rst2 = gr.Raster.from_array(self.arr2.astype(dtype2), transform=self.transform, crs=None, nodata=nodata2) ufunc = getattr(np, ufunc_str) - # If ufunc takes only one argument - if ufunc.nin == 1: - assert np.ma.allequal(ufunc(r1.data, casting="unsafe"), ufunc(r1, casting="unsafe").data) - elif ufunc.nin == 2: - assert np.ma.allequal(ufunc(r1.data, r2.data), ufunc(r1, r2).data) + + # Check if both our input dtypes are possible on this ufunc, if yes check that outputs are identical + if (dtype1, dtype2) in [(str(np.dtype(t[0])), str(np.dtype(t[1]))) for t in ufunc.types]: + assert np.ma.allequal(ufunc(rst1.data, rst2.data, casting="unsafe"), ufunc(rst1, rst2, casting="unsafe").data) + else: + print('Ufunc '+ufunc_str+' should fail with dtypes '+str(dtype1)+" and "+str(dtype2)) + + with pytest.raises(TypeError): + ufunc(rst1.data, rst2.data, casting="unsafe") + with pytest.raises(TypeError): + ufunc(rst1, rst2, casting="unsafe") + + From 674750fd98aefec2b15c640de9cdc205e73d8244 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Wed, 7 Sep 2022 14:27:40 +0200 Subject: [PATCH 06/18] Finalization of __array_ufunc__ interface and tests --- geoutils/georaster/raster.py | 54 ++++++++++++--- tests/test_georaster.py | 128 +++++++++++++++++++++++++++-------- 2 files changed, 143 insertions(+), 39 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index 83dfe212..a9b56d58 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -64,9 +64,11 @@ def _default_ndv(dtype: str | np.dtype | type) -> int: "int16": -32768, "uint32": 99999, "int32": -99999, + "float16": -99999, "float32": -99999, "float64": -99999, - "longdouble": -99999, + "float128": -99999, + "longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility } # Check argument dtype is as expected if not isinstance(dtype, (str, np.dtype, type)): @@ -972,21 +974,51 @@ def __array__(self): return self._data - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> RasterType: + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> RasterType | tuple[RasterType, RasterType]: if not self.is_loaded: self.load() + # If the universal function takes only one input if ufunc.nin == 1: - return self.__class__({"data": getattr(ufunc, method)(inputs[0].data, **kwargs), - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}) - else: - return self.__class__({"data": getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}) + # If the universal function has only one output + if ufunc.nout == 1: + return self.__class__({"data": getattr(ufunc, method)(inputs[0].data, **kwargs), + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}) + + # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) + elif ufunc.nout ==2: + output = getattr(ufunc, method)(inputs[0].data, **kwargs) + return self.__class__({"data": output[0], + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}), \ + self.__class__({"data": output[1], + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}) + + # If the universal function takes two inputs (Note: no ufunc exists that has three inputs or more) + elif ufunc.nin == 2: + if ufunc.nout == 1: + return self.__class__({"data": getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}) + + # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) + elif ufunc.nout == 2: + output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) + return self.__class__({"data": output[0], + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}),\ + self.__class__({"data": output[1], + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata}) def __array_function__(self, func, types, args, kwargs): diff --git a/tests/test_georaster.py b/tests/test_georaster.py index a86ec21a..51622541 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -1923,11 +1923,32 @@ class TestArrayInterface: if (ufunc[0] != '_' and ufunc.islower() and 'err' not in ufunc and ufunc not in ['e', 'pi', 'frompyfunc', 'euler_gamma'])] - # Universal functions with single input argument - ufuncs_str_one_nin = [ufunc for ufunc in ufuncs_str if getattr(np, ufunc).nin == 1] - - # Universal functions with two input arguments - ufuncs_str_two_nin = [ufunc for ufunc in ufuncs_str if getattr(np, ufunc).nin == 2] + # Universal functions with one input argument and one output, corresponding to (in NumPy 1.22.4): + # ['absolute', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'cbrt', 'ceil', 'conj', 'conjugate', + # 'cos', 'cosh', 'deg2rad', 'degrees', 'exp', 'exp2', 'expm1', 'fabs', 'floor', 'invert', 'isfinite', 'isinf', + # 'isnan', 'isnat', 'log', 'log10', 'log1p', 'log2', 'logical_not', 'negative', 'positive', 'rad2deg', 'radians', + # 'reciprocal', 'rint', 'sign', 'signbit', 'sin', 'sinh', 'spacing', 'sqrt', 'square', 'tan', 'tanh', 'trunc'] + ufuncs_str_1nin_1nout = [ufunc for ufunc in ufuncs_str + if (getattr(np, ufunc).nin == 1 and getattr(np, ufunc).nout == 1)] + + # Universal functions with one input argument and two output (Note: none exist for three outputs or above) + # Those correspond to: ['frexp', 'modf'] + ufuncs_str_1nin_2nout = [ufunc for ufunc in ufuncs_str + if (getattr(np, ufunc).nin == 1 and getattr(np, ufunc).nout == 2)] + + # Universal functions with two input arguments and one output, corresponding to: + # ['add', 'arctan2', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'copysign', 'divide', 'equal', 'floor_divide', + # 'float_power', 'fmax', 'fmin', 'fmod', 'gcd', 'greater', 'greater_equal', 'heaviside', 'hypot', 'lcm', 'ldexp', + # 'left_shift', 'less', 'less_equal', 'logaddexp', 'logaddexp2', 'logical_and', 'logical_or', 'logical_xor', + # 'maximum', 'minimum', 'mod', 'multiply', 'nextafter', 'not_equal', 'power', 'remainder', 'right_shift', + # 'subtract', 'true_divide'] + ufuncs_str_2nin_1nout = [ufunc for ufunc in ufuncs_str + if (getattr(np, ufunc).nin == 2 and getattr(np, ufunc).nout == 1)] + + # Universal functions with two input arguments and two outputs (Note: none exist for three outputs or above) + # These correspond to: ['divmod'] + ufuncs_str_2nin_2nout = [ufunc for ufunc in ufuncs_str + if (getattr(np, ufunc).nin == 2 and getattr(np, ufunc).nout == 2)] # We create two random array of varying dtype width = height = 5 @@ -1938,12 +1959,12 @@ class TestArrayInterface: arr1 = np.random.randint(min_val, max_val, (height, width), dtype="int32") + np.random.normal(size=(height, width)) arr2 = np.random.randint(min_val, max_val, (height, width), dtype="int32") + np.random.normal(size=(height, width)) - @pytest.mark.parametrize("ufunc_str", ufuncs_str_one_nin) + @pytest.mark.parametrize("ufunc_str", ufuncs_str_1nin_1nout + ufuncs_str_1nin_2nout) @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"]) @pytest.mark.parametrize("nodata_init", [None, "type_default"]) - def test_array_ufunc_one_nin(self, ufunc_str: str, nodata_init: None | str, dtype: str): - """Test that ufuncs with a single input argument consistently return the same result as for masked arrays.""" + def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, dtype: str): + """Test that ufuncs with one input and one output consistently return the same result as for masked arrays.""" # We set the default nodata if nodata_init == "type_default": @@ -1957,26 +1978,44 @@ def test_array_ufunc_one_nin(self, ufunc_str: str, nodata_init: None | str, dtyp # Get ufunc ufunc = getattr(np, ufunc_str) - # Check if our input dtype is possible on this ufunc, if yes check that outputs are identical - if dtype in [str(np.dtype(t[0])) for t in ufunc.types]: - assert np.ma.allequal(ufunc(rst.data), ufunc(rst).data) - # If the input dtype is not possible, check that NumPy raises a TypeError - else: - with pytest.raises(TypeError): - ufunc(rst.data) - with pytest.raises(TypeError): - ufunc(rst) + # Find the common dtype between the Raster and the most constrained input type (first character is the input) + com_dtype = np.find_common_type([dtype] + [ufunc.types[0][0]], []) + + # Catch warnings + with warnings.catch_warnings(): + + warnings.filterwarnings("ignore", category=RuntimeWarning) + warnings.filterwarnings("ignore", category=UserWarning) + + # Check if our input dtype is possible on this ufunc, if yes check that outputs are identical + if com_dtype in [str(np.dtype(t[0])) for t in ufunc.types]: + + # For a single output + if ufunc.nout == 1: + assert np.ma.allequal(ufunc(rst.data), ufunc(rst).data) + + # For two outputs + elif ufunc.nout == 2: + assert (np.ma.allequal(ufunc(rst.data)[0], ufunc(rst)[0].data) + and np.ma.allequal(ufunc(rst.data)[1], ufunc(rst)[1].data)) + + # If the input dtype is not possible, check that NumPy raises a TypeError + else: + with pytest.raises(TypeError): + ufunc(rst.data) + with pytest.raises(TypeError): + ufunc(rst) - @pytest.mark.parametrize("ufunc_str", ufuncs_str_two_nin) + @pytest.mark.parametrize("ufunc_str", ufuncs_str_2nin_1nout + ufuncs_str_2nin_2nout) @pytest.mark.parametrize("dtype1", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"]) @pytest.mark.parametrize("dtype2", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"]) @pytest.mark.parametrize("nodata1_init", [None, "type_default"]) @pytest.mark.parametrize("nodata2_init", [None, "type_default"]) - def test_array_ufunc_two_nin(self, ufunc_str: str, nodata1_init: None | str, nodata2_init: str, dtype1: str, dtype2: str): - """Test that ufuncs with a single input argument consistently return the same result as for masked arrays.""" + def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, nodata2_init: str, dtype1: str, dtype2: str): + """Test that ufuncs with two input arguments consistently return the same result as for masked arrays.""" # We set the default nodatas if nodata1_init == "type_default": @@ -1993,18 +2032,51 @@ def test_array_ufunc_two_nin(self, ufunc_str: str, nodata1_init: None | str, nod ufunc = getattr(np, ufunc_str) - # Check if both our input dtypes are possible on this ufunc, if yes check that outputs are identical - if (dtype1, dtype2) in [(str(np.dtype(t[0])), str(np.dtype(t[1]))) for t in ufunc.types]: - assert np.ma.allequal(ufunc(rst1.data, rst2.data, casting="unsafe"), ufunc(rst1, rst2, casting="unsafe").data) + # Find the common dtype between the Raster and the most constrained input type (first character is the input) + com_dtype1 = np.find_common_type([dtype1] + [ufunc.types[0][0]], []) + com_dtype2 = np.find_common_type([dtype2] + [ufunc.types[0][1]], []) + + # If the two input types can be the same type, pass a tuple with the common type of both + # Below we ignore datetime and timedelta types "m" and "M" + if all([t[0]==t[1] for t in ufunc.types if not ("m" or "M") in t[0:2]]): + com_dtype_both = np.find_common_type([com_dtype1, com_dtype2], []) + com_dtype_tuple = (com_dtype_both, com_dtype_both) + + # Otherwise, pass the tuple with each common type else: - print('Ufunc '+ufunc_str+' should fail with dtypes '+str(dtype1)+" and "+str(dtype2)) + com_dtype_tuple = (com_dtype1, com_dtype2) - with pytest.raises(TypeError): - ufunc(rst1.data, rst2.data, casting="unsafe") - with pytest.raises(TypeError): - ufunc(rst1, rst2, casting="unsafe") + # Catch warnings + with warnings.catch_warnings(): + + warnings.filterwarnings("ignore", category=RuntimeWarning) + warnings.filterwarnings("ignore", category=UserWarning) + # Check if both our input dtypes are possible on this ufunc, if yes check that outputs are identical + if com_dtype_tuple in [(str(np.dtype(t[0])), str(np.dtype(t[1]))) for t in ufunc.types]: + # For a single output + if ufunc.nout == 1: + # There exists a single exception due to negative integers as exponent of integers in "power" + if ufunc_str == 'power' and 'int' in dtype1 and 'int' in dtype2 and np.min(rst2.data) < 0: + with pytest.raises(ValueError, match='Integers to negative integer powers are not allowed.'): + ufunc(rst1, rst2) + with pytest.raises(ValueError, match='Integers to negative integer powers are not allowed.'): + ufunc(rst1.data, rst2.data) + # Otherwise, run the normal assertion for a single output + else: + assert np.ma.allequal(ufunc(rst1.data, rst2.data), ufunc(rst1, rst2).data) + # For two outputs + elif ufunc.nout == 2: + assert (np.ma.allequal(ufunc(rst1.data, rst2.data)[0], ufunc(rst1, rst2)[0].data) and + np.ma.allequal(ufunc(rst1.data, rst2.data)[1], ufunc(rst1, rst2)[1].data)) + + # If the input dtype is not possible, check that NumPy raises a TypeError + else: + with pytest.raises(TypeError): + ufunc(rst1.data, rst2.data) + with pytest.raises(TypeError): + ufunc(rst1, rst2) \ No newline at end of file From 4984d56da0e65ad9f8c55a9d49ff24f8452d1e77 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Wed, 7 Sep 2022 15:02:32 +0200 Subject: [PATCH 07/18] Add a mask to __array_ufunc__ tests --- tests/test_georaster.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 51622541..823ac623 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -1959,6 +1959,14 @@ class TestArrayInterface: arr1 = np.random.randint(min_val, max_val, (height, width), dtype="int32") + np.random.normal(size=(height, width)) arr2 = np.random.randint(min_val, max_val, (height, width), dtype="int32") + np.random.normal(size=(height, width)) + # Create two random masks + mask1 = np.random.randint(0, 2, size=(width, height), dtype=bool) + mask2 = np.random.randint(0, 2, size=(width, height), dtype=bool) + + # Assert that there is at least one unmasked value + assert np.count_nonzero(~mask1) > 0 + assert np.count_nonzero(~mask2) > 0 + @pytest.mark.parametrize("ufunc_str", ufuncs_str_1nin_1nout + ufuncs_str_1nin_2nout) @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"]) @@ -1973,7 +1981,8 @@ def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, d nodata = None # Create Raster - rst = gr.Raster.from_array(self.arr1.astype(dtype), transform=self.transform, crs=None, nodata=nodata) + ma1 = np.ma.masked_array(data = self.arr1.astype(dtype), mask=self.mask1) + rst = gr.Raster.from_array(ma1, transform=self.transform, crs=None, nodata=nodata) # Get ufunc ufunc = getattr(np, ufunc_str) @@ -2027,8 +2036,10 @@ def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, else: nodata2 = None - rst1 = gr.Raster.from_array(self.arr1.astype(dtype1), transform=self.transform, crs=None, nodata=nodata1) - rst2 = gr.Raster.from_array(self.arr2.astype(dtype2), transform=self.transform, crs=None, nodata=nodata2) + ma1 = np.ma.masked_array(data = self.arr1.astype(dtype1), mask=self.mask1) + ma2 = np.ma.masked_array(data = self.arr2.astype(dtype2), mask=self.mask2) + rst1 = gr.Raster.from_array(ma1, transform=self.transform, crs=None, nodata=nodata1) + rst2 = gr.Raster.from_array(ma2, transform=self.transform, crs=None, nodata=nodata2) ufunc = getattr(np, ufunc_str) From 587e571d85e6ccb2522adc01f28c03074b45111b Mon Sep 17 00:00:00 2001 From: rhugonne Date: Wed, 7 Sep 2022 18:14:40 +0200 Subject: [PATCH 08/18] Increment commit on array functions --- geoutils/georaster/raster.py | 36 ++++++------ tests/test_georaster.py | 103 ++++++++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 19 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index a9b56d58..2cf3f7e5 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -960,24 +960,20 @@ def copy(self: RasterType, new_array: np.ndarray | None = None) -> RasterType: return cp - # Not sure yet how this behaves with __array__, __array_ufunc__ and __array_function__ implemented - # @property - # def __array_interface__(self) -> dict[str, Any]: - # if not self.is_loaded: - # self.load()__array - # - # return self._data.__array_interface__ # type: ignore - - # Set up array interface for our class container - # Inspired from https://numpy.org/doc/stable/user/basics.dispatch.html#basics-dispatch + def __array__(self): + """Method to cast np.array() or np.asarray() function directly on Raster classes.""" return self._data def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> RasterType | tuple[RasterType, RasterType]: - - if not self.is_loaded: - self.load() + """ + Method to cast NumPy universal functions directly on Raster classes, by passing to the masked array. + This function basically applies the ufunc (with its method and kwargs) to .data, and rebuilds the Raster from + self.__class__. The cases separate the number of input nin and output nout, to properly feed .data and return + Raster objects. + See more details in NumPy doc, e.g., https://numpy.org/doc/stable/user/basics.dispatch.html#basics-dispatch. + """ # If the universal function takes only one input if ufunc.nin == 1: @@ -1020,12 +1016,16 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> RasterType | tupl "crs": self.crs, "nodata": self.nodata}) - def __array_function__(self, func, types, args, kwargs): - - if not self.is_loaded: - self.load() + def __array_function__(self, func, types, args, kwargs) -> Any: + """ + Method to cast NumPy array function directly on a Raster object by applying it to the masked array. + A limited number of function is supported, listed in XXX. + """ + # For subclassing + if not all(issubclass(t, self.__class__) for t in types): + return NotImplemented - return func(self._data, *kwargs) # type: ignore + return func(args[0].data, *args[1:], **kwargs) # Note the star is needed because of the default argument 'mode' preceding non default arg 'inplace' # Then the final overload must be duplicated diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 823ac623..a85cd729 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -1918,6 +1918,8 @@ def test_power(self, power: float | int) -> None: class TestArrayInterface: """Test that the array interface of Raster works as expected for ufuncs and array functions""" + # -- First, we list all universal NumPy functions, or "ufuncs" -- + # All universal functions of NumPy, about 90 in 2022. See list: https://numpy.org/doc/stable/reference/ufuncs.html ufuncs_str = [ufunc for ufunc in np.core.umath.__all__ if (ufunc[0] != '_' and ufunc.islower() @@ -1950,6 +1952,32 @@ class TestArrayInterface: ufuncs_str_2nin_2nout = [ufunc for ufunc in ufuncs_str if (getattr(np, ufunc).nin == 2 and getattr(np, ufunc).nout == 2)] + # -- Second, we list array functions we intend to support in the array interface -- + + # To my knowledge, there is no list that includes all numpy functions (and we probably don't want to test them all) + # Let's include manually the important ones: + # - statistics: normal, for NaNs, and for masked_arrays; + # - sorting and counting; + # Most other math functions are already universal functions + + # NaN functions: [f for f in np.lib.nanfunctions.__all__] + nanstatfuncs = ['nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 'nanmedian', 'nanpercentile', + 'nanvar', 'nanstd', 'nanprod', 'nancumsum', 'nancumprod', 'nanquantile'] + + # Statistics and sorting matching NaN functions: https://numpy.org/doc/stable/reference/routines.statistics.html + # and https://numpy.org/doc/stable/reference/routines.sort.html + statfuncs = ['sum', 'max', 'min', 'argmax', 'argmin', 'mean', 'median', 'percentile', 'var', 'std', 'prod', + 'cumsum', 'cumprod', 'quantile'] + + # Sorting and counting ounting with single array input: + sortfuncs = ['sort', 'count_nonzero'] + + # Masked array functions that match the naming of normal functions + mastatsfuncs = statfuncs + + + # -- Third, we define the test data -- + # We create two random array of varying dtype width = height = 5 min_val = np.iinfo("int32").min @@ -2090,4 +2118,77 @@ def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, with pytest.raises(TypeError): ufunc(rst1.data, rst2.data) with pytest.raises(TypeError): - ufunc(rst1, rst2) \ No newline at end of file + ufunc(rst1, rst2) + + @pytest.mark.parametrize("arrfunc_str", nanstatfuncs + statfuncs + sortfuncs) + @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", + "float32", "float64", "longdouble"]) + @pytest.mark.parametrize("nodata_init", [None, "type_default"]) + def test_array_functions(self, arrfunc_str, dtype, nodata_init): + """Test that array functions that we support give the same output as they would on the masked array""" + + # We set the default nodata + if nodata_init == "type_default": + nodata: int | None = _default_ndv(dtype) + else: + nodata = None + + # Create Raster + ma1 = np.ma.masked_array(data=self.arr1.astype(dtype), mask=self.mask1) + rst = gr.Raster.from_array(ma1, transform=self.transform, crs=None, nodata=nodata) + + # Get array func + arrfunc = getattr(np, arrfunc_str) + + # Find the common dtype between the Raster and the most constrained input type (first character is the input) + # com_dtype = np.find_common_type([dtype] + [arrfunc.types[0][0]], []) + + # Catch warnings + # with warnings.catch_warnings(): + + # warnings.filterwarnings("ignore", category=RuntimeWarning) + # warnings.filterwarnings("ignore", category=UserWarning) + + # Check if our input dtype is possible on this array function, if yes check that outputs are identical + # if com_dtype in [str(np.dtype(t[0])) for t in ufunc.types]: + + # Pass an argument for functions that require it (nanpercentile, percentile, quantile and nanquantile) + if 'percentile' in arrfunc_str: + arg = 80 + output_rst = arrfunc(rst, arg) + output_ma = arrfunc(rst.data, arg) + elif 'quantile' in arrfunc_str: + arg = 0.8 + output_rst = arrfunc(rst, arg) + output_ma = arrfunc(rst.data, arg) + else: + output_rst = arrfunc(rst) + output_ma = arrfunc(rst.data) + + if isinstance(output_rst, np.ndarray): + assert np.ma.allequal(output_rst, output_ma) + else: + assert output_rst == output_ma + + # If the array function also exists for masked array, run a test from np.ma as well + if arrfunc_str in self.mastatsfuncs: + + mafunc = getattr(np.ma, arrfunc_str) + + if 'percentile' in arrfunc_str: + arg = 80 + output_rst = mafunc(rst, arg) + output_ma = mafunc(rst.data, arg) + elif 'quantile' in arrfunc_str: + arg = 0.8 + output_rst = mafunc(rst, arg) + output_ma = mafunc(rst.data, arg) + else: + output_rst = mafunc(rst) + output_ma = mafunc(rst.data) + + if isinstance(output_rst, np.ndarray): + assert np.ma.allequal(output_rst, output_ma) + else: + assert output_rst == output_ma + From 3a7db5882bb4b8aae5f6cdbafecc601423e9c227 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Wed, 7 Sep 2022 23:47:51 +0200 Subject: [PATCH 09/18] Finalize array functions and linting --- geoutils/georaster/raster.py | 138 +++++++++++++++++++------- tests/test_georaster.py | 184 +++++++++++++++++------------------ 2 files changed, 193 insertions(+), 129 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index 2cf3f7e5..e0bc8be6 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -48,6 +48,43 @@ RasterType = TypeVar("RasterType", bound="Raster") +# List of numpy functions that are handled: nan statistics function, normal statistics function and sorting/counting +_HANDLED_FUNCTIONS = ( + [ + "nansum", + "nanmax", + "nanmin", + "nanargmax", + "nanargmin", + "nanmean", + "nanmedian", + "nanpercentile", + "nanvar", + "nanstd", + "nanprod", + "nancumsum", + "nancumprod", + "nanquantile", + ] + + [ + "sum", + "amax", + "amin", + "argmax", + "argmin", + "mean", + "median", + "percentile", + "var", + "std", + "prod", + "cumsum", + "cumprod", + "quantile", + ] + + ["sort", "count_nonzero", "unique"] +) + # Function to set the default nodata values for any given dtype # Similar to GDAL for int types, but without absurdly long nodata values for floats. @@ -68,7 +105,7 @@ def _default_ndv(dtype: str | np.dtype | type) -> int: "float32": -99999, "float64": -99999, "float128": -99999, - "longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility + "longdouble": -99999, # This is float64 on Windows, float128 on other systems, for compatibility } # Check argument dtype is as expected if not isinstance(dtype, (str, np.dtype, type)): @@ -960,13 +997,18 @@ def copy(self: RasterType, new_array: np.ndarray | None = None) -> RasterType: return cp - - def __array__(self): + def __array__(self) -> np.ndarray: """Method to cast np.array() or np.asarray() function directly on Raster classes.""" return self._data - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> RasterType | tuple[RasterType, RasterType]: + def __array_ufunc__( + self, + ufunc: Callable[[np.ndarray | tuple[np.ndarray, np.ndarray]], np.ndarray | tuple[np.ndarray, np.ndarray]], + method: str, + *inputs: Raster | tuple[Raster, Raster], + **kwargs: Any, + ) -> Raster | tuple[Raster, Raster]: """ Method to cast NumPy universal functions directly on Raster classes, by passing to the masked array. This function basically applies the ufunc (with its method and kwargs) to .data, and rebuilds the Raster from @@ -979,53 +1021,77 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs) -> RasterType | tupl if ufunc.nin == 1: # If the universal function has only one output if ufunc.nout == 1: - return self.__class__({"data": getattr(ufunc, method)(inputs[0].data, **kwargs), - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}) + return self.__class__( + { + "data": getattr(ufunc, method)(inputs[0].data, **kwargs), # type: ignore + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata, + } + ) # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) - elif ufunc.nout ==2: - output = getattr(ufunc, method)(inputs[0].data, **kwargs) - return self.__class__({"data": output[0], - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}), \ - self.__class__({"data": output[1], - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}) + else: + output = getattr(ufunc, method)(inputs[0].data, **kwargs) # type: ignore + return self.__class__( + {"data": output[0], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} + ), self.__class__( + {"data": output[1], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} + ) # If the universal function takes two inputs (Note: no ufunc exists that has three inputs or more) - elif ufunc.nin == 2: + else: if ufunc.nout == 1: - return self.__class__({"data": getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}) + return self.__class__( + { + "data": getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), # type: ignore + "transform": self.transform, + "crs": self.crs, + "nodata": self.nodata, + } + ) # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) - elif ufunc.nout == 2: - output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) - return self.__class__({"data": output[0], - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}),\ - self.__class__({"data": output[1], - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata}) - - def __array_function__(self, func, types, args, kwargs) -> Any: + else: + output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) # type: ignore + return self.__class__( + {"data": output[0], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} + ), self.__class__( + {"data": output[1], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} + ) + + def __array_function__(self, func: Callable[[np.ndarray, Any], Any], types: type, args: Any, kwargs: Any) -> Any: """ Method to cast NumPy array function directly on a Raster object by applying it to the masked array. A limited number of function is supported, listed in XXX. """ + + # If function is not implemented + if func.__name__ not in _HANDLED_FUNCTIONS: + return NotImplemented + # For subclassing if not all(issubclass(t, self.__class__) for t in types): return NotImplemented - return func(args[0].data, *args[1:], **kwargs) + # We now choose the behaviour of array functions + # For median, np.median ignores masks of masked array, so we force np.ma.median + if func.__name__ in ["median", "nanmedian"]: + func = np.ma.median + first_arg = args[0].data + + # For percentiles and quantiles, there exist no masked array version, so we compute on the valid data directly + elif func.__name__ in ["percentile", "nanpercentile"]: + first_arg = args[0].data.data[~args[0].data.mask] + + elif func.__name__ in ["quantile", "nanquantile"]: + first_arg = args[0].data.data[~args[0].data.mask] + + # Otherwise, we run the numpy function normally (most take masks into account) + else: + first_arg = args[0].data + + return func(first_arg, *args[1:], **kwargs) # type: ignore # Note the star is needed because of the default argument 'mode' preceding non default arg 'inplace' # Then the final overload must be duplicated diff --git a/tests/test_georaster.py b/tests/test_georaster.py index a85cd729..74ea09b1 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -21,7 +21,8 @@ import geoutils.misc import geoutils.projtools as pt from geoutils import examples -from geoutils.georaster.raster import _default_ndv, _default_rio_attrs +from geoutils.georaster.raster import (_HANDLED_FUNCTIONS, _default_ndv, + _default_rio_attrs) from geoutils.misc import resampling_method_from_str DO_PLOT = False @@ -1439,7 +1440,7 @@ def test_split_bands(self) -> None: red_c = img.split_bands(copy=True, subset=0)[0] # Check that the red band data does not share memory with the rgb image (it's a copy) - assert not np.shares_memory(red_c, img) + assert not np.shares_memory(red_c.data, img.data) # Modify the copy, and make sure the original data is not modified. red_c.data += 1 @@ -1921,22 +1922,31 @@ class TestArrayInterface: # -- First, we list all universal NumPy functions, or "ufuncs" -- # All universal functions of NumPy, about 90 in 2022. See list: https://numpy.org/doc/stable/reference/ufuncs.html - ufuncs_str = [ufunc for ufunc in np.core.umath.__all__ - if (ufunc[0] != '_' and ufunc.islower() - and 'err' not in ufunc and ufunc not in ['e', 'pi', 'frompyfunc', 'euler_gamma'])] + ufuncs_str = [ + ufunc + for ufunc in np.core.umath.__all__ + if ( + ufunc[0] != "_" + and ufunc.islower() + and "err" not in ufunc + and ufunc not in ["e", "pi", "frompyfunc", "euler_gamma"] + ) + ] # Universal functions with one input argument and one output, corresponding to (in NumPy 1.22.4): # ['absolute', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'cbrt', 'ceil', 'conj', 'conjugate', # 'cos', 'cosh', 'deg2rad', 'degrees', 'exp', 'exp2', 'expm1', 'fabs', 'floor', 'invert', 'isfinite', 'isinf', # 'isnan', 'isnat', 'log', 'log10', 'log1p', 'log2', 'logical_not', 'negative', 'positive', 'rad2deg', 'radians', # 'reciprocal', 'rint', 'sign', 'signbit', 'sin', 'sinh', 'spacing', 'sqrt', 'square', 'tan', 'tanh', 'trunc'] - ufuncs_str_1nin_1nout = [ufunc for ufunc in ufuncs_str - if (getattr(np, ufunc).nin == 1 and getattr(np, ufunc).nout == 1)] + ufuncs_str_1nin_1nout = [ + ufunc for ufunc in ufuncs_str if (getattr(np, ufunc).nin == 1 and getattr(np, ufunc).nout == 1) + ] # Universal functions with one input argument and two output (Note: none exist for three outputs or above) # Those correspond to: ['frexp', 'modf'] - ufuncs_str_1nin_2nout = [ufunc for ufunc in ufuncs_str - if (getattr(np, ufunc).nin == 1 and getattr(np, ufunc).nout == 2)] + ufuncs_str_1nin_2nout = [ + ufunc for ufunc in ufuncs_str if (getattr(np, ufunc).nin == 1 and getattr(np, ufunc).nout == 2) + ] # Universal functions with two input arguments and one output, corresponding to: # ['add', 'arctan2', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'copysign', 'divide', 'equal', 'floor_divide', @@ -1944,13 +1954,15 @@ class TestArrayInterface: # 'left_shift', 'less', 'less_equal', 'logaddexp', 'logaddexp2', 'logical_and', 'logical_or', 'logical_xor', # 'maximum', 'minimum', 'mod', 'multiply', 'nextafter', 'not_equal', 'power', 'remainder', 'right_shift', # 'subtract', 'true_divide'] - ufuncs_str_2nin_1nout = [ufunc for ufunc in ufuncs_str - if (getattr(np, ufunc).nin == 2 and getattr(np, ufunc).nout == 1)] + ufuncs_str_2nin_1nout = [ + ufunc for ufunc in ufuncs_str if (getattr(np, ufunc).nin == 2 and getattr(np, ufunc).nout == 1) + ] # Universal functions with two input arguments and two outputs (Note: none exist for three outputs or above) # These correspond to: ['divmod'] - ufuncs_str_2nin_2nout = [ufunc for ufunc in ufuncs_str - if (getattr(np, ufunc).nin == 2 and getattr(np, ufunc).nout == 2)] + ufuncs_str_2nin_2nout = [ + ufunc for ufunc in ufuncs_str if (getattr(np, ufunc).nin == 2 and getattr(np, ufunc).nout == 2) + ] # -- Second, we list array functions we intend to support in the array interface -- @@ -1960,21 +1972,21 @@ class TestArrayInterface: # - sorting and counting; # Most other math functions are already universal functions + # The full list exists in Raster + handled_functions = _HANDLED_FUNCTIONS + + # Details below: # NaN functions: [f for f in np.lib.nanfunctions.__all__] - nanstatfuncs = ['nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 'nanmedian', 'nanpercentile', - 'nanvar', 'nanstd', 'nanprod', 'nancumsum', 'nancumprod', 'nanquantile'] + # nanstatfuncs = ['nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 'nanmedian', 'nanpercentile', + # 'nanvar', 'nanstd', 'nanprod', 'nancumsum', 'nancumprod', 'nanquantile'] # Statistics and sorting matching NaN functions: https://numpy.org/doc/stable/reference/routines.statistics.html # and https://numpy.org/doc/stable/reference/routines.sort.html - statfuncs = ['sum', 'max', 'min', 'argmax', 'argmin', 'mean', 'median', 'percentile', 'var', 'std', 'prod', - 'cumsum', 'cumprod', 'quantile'] + # statfuncs = ['sum', 'max', 'min', 'argmax', 'argmin', 'mean', 'median', 'percentile', 'var', 'std', 'prod', + # 'cumsum', 'cumprod', 'quantile'] # Sorting and counting ounting with single array input: - sortfuncs = ['sort', 'count_nonzero'] - - # Masked array functions that match the naming of normal functions - mastatsfuncs = statfuncs - + # sortfuncs = ['sort', 'count_nonzero', 'unique] # -- Third, we define the test data -- @@ -1995,11 +2007,12 @@ class TestArrayInterface: assert np.count_nonzero(~mask1) > 0 assert np.count_nonzero(~mask2) > 0 - @pytest.mark.parametrize("ufunc_str", ufuncs_str_1nin_1nout + ufuncs_str_1nin_2nout) - @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", - "float32", "float64", "longdouble"]) - @pytest.mark.parametrize("nodata_init", [None, "type_default"]) - def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, dtype: str): + @pytest.mark.parametrize("ufunc_str", ufuncs_str_1nin_1nout + ufuncs_str_1nin_2nout) # type: ignore + @pytest.mark.parametrize( + "dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"] + ) # type: ignore + @pytest.mark.parametrize("nodata_init", [None, "type_default"]) # type: ignore + def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, dtype: str) -> None: """Test that ufuncs with one input and one output consistently return the same result as for masked arrays.""" # We set the default nodata @@ -2009,7 +2022,7 @@ def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, d nodata = None # Create Raster - ma1 = np.ma.masked_array(data = self.arr1.astype(dtype), mask=self.mask1) + ma1 = np.ma.masked_array(data=self.arr1.astype(dtype), mask=self.mask1) rst = gr.Raster.from_array(ma1, transform=self.transform, crs=None, nodata=nodata) # Get ufunc @@ -2025,7 +2038,7 @@ def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, d warnings.filterwarnings("ignore", category=UserWarning) # Check if our input dtype is possible on this ufunc, if yes check that outputs are identical - if com_dtype in [str(np.dtype(t[0])) for t in ufunc.types]: + if com_dtype in (str(np.dtype(t[0])) for t in ufunc.types): # For a single output if ufunc.nout == 1: @@ -2033,8 +2046,9 @@ def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, d # For two outputs elif ufunc.nout == 2: - assert (np.ma.allequal(ufunc(rst.data)[0], ufunc(rst)[0].data) - and np.ma.allequal(ufunc(rst.data)[1], ufunc(rst)[1].data)) + assert np.ma.allequal(ufunc(rst.data)[0], ufunc(rst)[0].data) and np.ma.allequal( + ufunc(rst.data)[1], ufunc(rst)[1].data + ) # If the input dtype is not possible, check that NumPy raises a TypeError else: @@ -2043,15 +2057,18 @@ def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, d with pytest.raises(TypeError): ufunc(rst) - - @pytest.mark.parametrize("ufunc_str", ufuncs_str_2nin_1nout + ufuncs_str_2nin_2nout) - @pytest.mark.parametrize("dtype1", ["uint8", "int8", "uint16", "int16", "uint32", "int32", - "float32", "float64", "longdouble"]) - @pytest.mark.parametrize("dtype2", ["uint8", "int8", "uint16", "int16", "uint32", "int32", - "float32", "float64", "longdouble"]) - @pytest.mark.parametrize("nodata1_init", [None, "type_default"]) - @pytest.mark.parametrize("nodata2_init", [None, "type_default"]) - def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, nodata2_init: str, dtype1: str, dtype2: str): + @pytest.mark.parametrize("ufunc_str", ufuncs_str_2nin_1nout + ufuncs_str_2nin_2nout) # type: ignore + @pytest.mark.parametrize( + "dtype1", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"] + ) # type: ignore + @pytest.mark.parametrize( + "dtype2", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"] + ) # type: ignore + @pytest.mark.parametrize("nodata1_init", [None, "type_default"]) # type: ignore + @pytest.mark.parametrize("nodata2_init", [None, "type_default"]) # type: ignore + def test_array_ufunc_2nin_1nout( + self, ufunc_str: str, nodata1_init: None | str, nodata2_init: str, dtype1: str, dtype2: str + ) -> None: """Test that ufuncs with two input arguments consistently return the same result as for masked arrays.""" # We set the default nodatas @@ -2064,8 +2081,8 @@ def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, else: nodata2 = None - ma1 = np.ma.masked_array(data = self.arr1.astype(dtype1), mask=self.mask1) - ma2 = np.ma.masked_array(data = self.arr2.astype(dtype2), mask=self.mask2) + ma1 = np.ma.masked_array(data=self.arr1.astype(dtype1), mask=self.mask1) + ma2 = np.ma.masked_array(data=self.arr2.astype(dtype2), mask=self.mask2) rst1 = gr.Raster.from_array(ma1, transform=self.transform, crs=None, nodata=nodata1) rst2 = gr.Raster.from_array(ma2, transform=self.transform, crs=None, nodata=nodata2) @@ -2077,7 +2094,7 @@ def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, # If the two input types can be the same type, pass a tuple with the common type of both # Below we ignore datetime and timedelta types "m" and "M" - if all([t[0]==t[1] for t in ufunc.types if not ("m" or "M") in t[0:2]]): + if all(t[0] == t[1] for t in ufunc.types if not ("m" or "M") in t[0:2]): com_dtype_both = np.find_common_type([com_dtype1, com_dtype2], []) com_dtype_tuple = (com_dtype_both, com_dtype_both) @@ -2092,16 +2109,16 @@ def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, warnings.filterwarnings("ignore", category=UserWarning) # Check if both our input dtypes are possible on this ufunc, if yes check that outputs are identical - if com_dtype_tuple in [(str(np.dtype(t[0])), str(np.dtype(t[1]))) for t in ufunc.types]: + if com_dtype_tuple in ((str(np.dtype(t[0])), str(np.dtype(t[1]))) for t in ufunc.types): # For a single output if ufunc.nout == 1: # There exists a single exception due to negative integers as exponent of integers in "power" - if ufunc_str == 'power' and 'int' in dtype1 and 'int' in dtype2 and np.min(rst2.data) < 0: - with pytest.raises(ValueError, match='Integers to negative integer powers are not allowed.'): + if ufunc_str == "power" and "int" in dtype1 and "int" in dtype2 and np.min(rst2.data) < 0: + with pytest.raises(ValueError, match="Integers to negative integer powers are not allowed."): ufunc(rst1, rst2) - with pytest.raises(ValueError, match='Integers to negative integer powers are not allowed.'): + with pytest.raises(ValueError, match="Integers to negative integer powers are not allowed."): ufunc(rst1.data, rst2.data) # Otherwise, run the normal assertion for a single output @@ -2110,8 +2127,9 @@ def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, # For two outputs elif ufunc.nout == 2: - assert (np.ma.allequal(ufunc(rst1.data, rst2.data)[0], ufunc(rst1, rst2)[0].data) and - np.ma.allequal(ufunc(rst1.data, rst2.data)[1], ufunc(rst1, rst2)[1].data)) + assert np.ma.allequal(ufunc(rst1.data, rst2.data)[0], ufunc(rst1, rst2)[0].data) and np.ma.allequal( + ufunc(rst1.data, rst2.data)[1], ufunc(rst1, rst2)[1].data + ) # If the input dtype is not possible, check that NumPy raises a TypeError else: @@ -2120,11 +2138,12 @@ def test_array_ufunc_2nin_1nout(self, ufunc_str: str, nodata1_init: None | str, with pytest.raises(TypeError): ufunc(rst1, rst2) - @pytest.mark.parametrize("arrfunc_str", nanstatfuncs + statfuncs + sortfuncs) - @pytest.mark.parametrize("dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", - "float32", "float64", "longdouble"]) - @pytest.mark.parametrize("nodata_init", [None, "type_default"]) - def test_array_functions(self, arrfunc_str, dtype, nodata_init): + @pytest.mark.parametrize("arrfunc_str", handled_functions) # type: ignore + @pytest.mark.parametrize( + "dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"] + ) # type: ignore + @pytest.mark.parametrize("nodata_init", [None, "type_default"]) # type: ignore + def test_array_functions(self, arrfunc_str: str, dtype: str, nodata_init: None | str) -> None: """Test that array functions that we support give the same output as they would on the masked array""" # We set the default nodata @@ -2144,51 +2163,30 @@ def test_array_functions(self, arrfunc_str, dtype, nodata_init): # com_dtype = np.find_common_type([dtype] + [arrfunc.types[0][0]], []) # Catch warnings - # with warnings.catch_warnings(): - - # warnings.filterwarnings("ignore", category=RuntimeWarning) - # warnings.filterwarnings("ignore", category=UserWarning) - - # Check if our input dtype is possible on this array function, if yes check that outputs are identical - # if com_dtype in [str(np.dtype(t[0])) for t in ufunc.types]: - - # Pass an argument for functions that require it (nanpercentile, percentile, quantile and nanquantile) - if 'percentile' in arrfunc_str: - arg = 80 - output_rst = arrfunc(rst, arg) - output_ma = arrfunc(rst.data, arg) - elif 'quantile' in arrfunc_str: - arg = 0.8 - output_rst = arrfunc(rst, arg) - output_ma = arrfunc(rst.data, arg) - else: - output_rst = arrfunc(rst) - output_ma = arrfunc(rst.data) - - if isinstance(output_rst, np.ndarray): - assert np.ma.allequal(output_rst, output_ma) - else: - assert output_rst == output_ma - - # If the array function also exists for masked array, run a test from np.ma as well - if arrfunc_str in self.mastatsfuncs: + with warnings.catch_warnings(): - mafunc = getattr(np.ma, arrfunc_str) + warnings.filterwarnings("ignore", category=RuntimeWarning) - if 'percentile' in arrfunc_str: - arg = 80 - output_rst = mafunc(rst, arg) - output_ma = mafunc(rst.data, arg) - elif 'quantile' in arrfunc_str: + # Pass an argument for functions that require it (nanpercentile, percentile, quantile and nanquantile) and + # define specific behaviour + if "percentile" in arrfunc_str: + arg = 80.0 + # For percentiles and quantiles, the statistic is computed after removing the masked values + output_rst = arrfunc(rst, arg) + output_ma = arrfunc(rst.data.data[~rst.data.mask], arg) + elif "quantile" in arrfunc_str: arg = 0.8 - output_rst = mafunc(rst, arg) - output_ma = mafunc(rst.data, arg) + output_rst = arrfunc(rst, arg) + output_ma = arrfunc(rst.data.data[~rst.data.mask], arg) + elif "median" in arrfunc_str: + # For the median, the statistic is computed by masking the values through np.ma.median + output_rst = arrfunc(rst) + output_ma = np.ma.median(rst.data) else: - output_rst = mafunc(rst) - output_ma = mafunc(rst.data) + output_rst = arrfunc(rst) + output_ma = arrfunc(rst.data) if isinstance(output_rst, np.ndarray): assert np.ma.allequal(output_rst, output_ma) else: assert output_rst == output_ma - From 7ed901c4b8c29791eaed595271a10d827f5ced2a Mon Sep 17 00:00:00 2001 From: rhugonne Date: Wed, 7 Sep 2022 23:48:39 +0200 Subject: [PATCH 10/18] Add regex ignore codespell on nin (which is a numpy variable, cannot be changed) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 982d3194..c87465bd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: rev: v2.0.0 hooks: - id: codespell - args: [--ignore-words-list=alos] + args: [--ignore-words-list=alos, --ignore-regex=\bnin\b] types_or: [python, rst, markdown] files: ^(geoutils|doc|tests)/ From 4d84a72c6d32582913ea388959994c5c94a7b0e7 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Thu, 8 Sep 2022 10:16:42 +0200 Subject: [PATCH 11/18] Update descriptions --- geoutils/georaster/raster.py | 2 +- tests/test_georaster.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index e0bc8be6..dd1b524f 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -1063,7 +1063,7 @@ def __array_ufunc__( def __array_function__(self, func: Callable[[np.ndarray, Any], Any], types: type, args: Any, kwargs: Any) -> Any: """ Method to cast NumPy array function directly on a Raster object by applying it to the masked array. - A limited number of function is supported, listed in XXX. + A limited number of function is supported, listed in raster._HANDLED_FUNCTIONS. """ # If function is not implemented diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 74ea09b1..6072f142 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -1968,7 +1968,7 @@ class TestArrayInterface: # To my knowledge, there is no list that includes all numpy functions (and we probably don't want to test them all) # Let's include manually the important ones: - # - statistics: normal, for NaNs, and for masked_arrays; + # - statistics: normal and for NaNs; # - sorting and counting; # Most other math functions are already universal functions From 79e3bf6223b13aff663cbe840d1ac7950c61bfe0 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Thu, 8 Sep 2022 10:19:28 +0200 Subject: [PATCH 12/18] Making black and isort happy simultaneously --- tests/test_georaster.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 6072f142..cb549a84 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -21,8 +21,7 @@ import geoutils.misc import geoutils.projtools as pt from geoutils import examples -from geoutils.georaster.raster import (_HANDLED_FUNCTIONS, _default_ndv, - _default_rio_attrs) +from geoutils.georaster.raster import _default_ndv, _default_rio_attrs from geoutils.misc import resampling_method_from_str DO_PLOT = False @@ -1973,7 +1972,7 @@ class TestArrayInterface: # Most other math functions are already universal functions # The full list exists in Raster - handled_functions = _HANDLED_FUNCTIONS + handled_functions = gu.georaster.raster._HANDLED_FUNCTIONS # Details below: # NaN functions: [f for f in np.lib.nanfunctions.__all__] From 0a1b551916d153a461263733d5038440a029d61b Mon Sep 17 00:00:00 2001 From: rhugonne Date: Sun, 11 Sep 2022 11:01:05 +0200 Subject: [PATCH 13/18] Uncomment reflectivity test with np.ndarray and enable 2D broadcast for reflective arithmetic operations --- geoutils/georaster/raster.py | 9 ++++++-- tests/test_georaster.py | 41 ++++++++++++++++++++++++------------ 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index dd1b524f..c91c16e7 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -598,12 +598,17 @@ def _overloading_check( # Case 2 - other is a numpy array elif isinstance(other, np.ndarray): # Check that both array have the same shape - if self.data.shape == other.shape: + + if len(other.shape) == 2: + other_data = other[np.newaxis, :, :] + else: + other_data = other + + if self.data.shape == other_data.shape: pass else: raise ValueError("Both rasters must have the same shape.") - other_data = other ndv2 = None dtype2 = other_data.dtype diff --git a/tests/test_georaster.py b/tests/test_georaster.py index cb549a84..6fc67008 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -1828,18 +1828,21 @@ def test_ops_2args_implicit(self) -> None: """ warnings.filterwarnings("ignore", message="invalid value encountered") - # Test various inputs: Raster with different dtypes, np.ndarray, single number + # Test various inputs: Raster with different dtypes, np.ndarray with 2D or 3D shape, single number r1 = self.r1 r1_f32 = self.r1_f32 r2 = self.r2 - array = np.random.randint(1, 255, (1, self.height, self.width)).astype("uint8") + array_3d = np.random.randint(1, 255, (1, self.height, self.width)).astype("uint8") + array_2d = np.random.randint(1, 255, (self.height, self.width)).astype("uint8") floatval = 3.14 # Addition assert r1 + r2 == self.from_array(r1.data + r2.data, rst_ref=r1) assert r1_f32 + r2 == self.from_array(r1_f32.data + r2.data, rst_ref=r1) - # assert array + r2 == self.from_array(array + r2.data, rst_ref=r1) # this case fails as using numpy's add... - assert r2 + array == self.from_array(r2.data + array, rst_ref=r1) + assert array_3d + r2 == self.from_array(array_3d + r2.data, rst_ref=r1) + assert r2 + array_3d == self.from_array(r2.data + array_3d, rst_ref=r1) + assert array_2d + r2 == self.from_array(array_2d[np.newaxis, :, :] + r2.data, rst_ref=r1) + assert r2 + array_2d == self.from_array(r2.data + array_2d[np.newaxis, :, :], rst_ref=r1) assert r1 + floatval == self.from_array(r1.data.astype("float32") + floatval, rst_ref=r1) assert floatval + r1 == self.from_array(floatval + r1.data.astype("float32"), rst_ref=r1) assert r1 + r2 == r2 + r1 @@ -1847,8 +1850,10 @@ def test_ops_2args_implicit(self) -> None: # Multiplication assert r1 * r2 == self.from_array(r1.data * r2.data, rst_ref=r1) assert r1_f32 * r2 == self.from_array(r1_f32.data * r2.data, rst_ref=r1) - # assert array * r2 == self.from_array(array * r2.data, rst_ref=r1) # this case fails as using numpy's mul... - assert r2 * array == self.from_array(r2.data * array, rst_ref=r1) + assert array_3d * r2 == self.from_array(array_3d * r2.data, rst_ref=r1) + assert r2 * array_3d == self.from_array(r2.data * array_3d, rst_ref=r1) + assert array_2d * r2 == self.from_array(array_2d[np.newaxis, :, :] * r2.data, rst_ref=r1) + assert r2 * array_2d == self.from_array(r2.data * array_2d[np.newaxis, :, :], rst_ref=r1) assert r1 * floatval == self.from_array(r1.data.astype("float32") * floatval, rst_ref=r1) assert floatval * r1 == self.from_array(floatval * r1.data.astype("float32"), rst_ref=r1) assert r1 * r2 == r2 * r1 @@ -1856,32 +1861,40 @@ def test_ops_2args_implicit(self) -> None: # Subtraction assert r1 - r2 == self.from_array(r1.data - r2.data, rst_ref=r1) assert r1_f32 - r2 == self.from_array(r1_f32.data - r2.data, rst_ref=r1) - # assert array - r2 == self.from_array(array - r2.data, rst_ref=r1) # this case fails - assert r2 - array == self.from_array(r2.data - array, rst_ref=r1) + assert array_3d - r2 == self.from_array(array_3d - r2.data, rst_ref=r1) + assert r2 - array_3d == self.from_array(r2.data - array_3d, rst_ref=r1) + assert array_2d - r2 == self.from_array(array_2d[np.newaxis, :, :] - r2.data, rst_ref=r1) + assert r2 - array_2d == self.from_array(r2.data - array_2d[np.newaxis, :, :], rst_ref=r1) assert r1 - floatval == self.from_array(r1.data.astype("float32") - floatval, rst_ref=r1) assert floatval - r1 == self.from_array(floatval - r1.data.astype("float32"), rst_ref=r1) # True division assert r1 / r2 == self.from_array(r1.data / r2.data, rst_ref=r1) assert r1_f32 / r2 == self.from_array(r1_f32.data / r2.data, rst_ref=r1) - # assert array / r2 == self.from_array(array / r2.data, rst_ref=r1) # this case fails - assert r2 / array == self.from_array(r2.data / array, rst_ref=r2) + assert array_3d / r2 == self.from_array(array_3d / r2.data, rst_ref=r1) + assert r2 / array_3d == self.from_array(r2.data / array_3d, rst_ref=r2) + assert array_2d / r2 == self.from_array(array_2d[np.newaxis, :, :] / r2.data, rst_ref=r1) + assert r2 / array_2d == self.from_array(r2.data / array_2d[np.newaxis, :, :], rst_ref=r2) assert r1 / floatval == self.from_array(r1.data.astype("float32") / floatval, rst_ref=r1) assert floatval / r1 == self.from_array(floatval / r1.data.astype("float32"), rst_ref=r1) # Floor division assert r1 // r2 == self.from_array(r1.data // r2.data, rst_ref=r1) assert r1_f32 // r2 == self.from_array(r1_f32.data // r2.data, rst_ref=r1) - # assert array // r2 == self.from_array(array // r2.data, rst_ref=r1) # this case fails - assert r2 // array == self.from_array(r2.data // array, rst_ref=r1) + assert array_3d // r2 == self.from_array(array_3d // r2.data, rst_ref=r1) + assert r2 // array_3d == self.from_array(r2.data // array_3d, rst_ref=r1) + assert array_2d // r2 == self.from_array(array_2d[np.newaxis, :, :] // r2.data, rst_ref=r1) + assert r2 // array_2d == self.from_array(r2.data // array_2d[np.newaxis, :, :], rst_ref=r1) assert r1 // floatval == self.from_array(r1.data // floatval, rst_ref=r1) assert floatval // r1 == self.from_array(floatval // r1.data, rst_ref=r1) # Modulo assert r1 % r2 == self.from_array(r1.data % r2.data, rst_ref=r1) assert r1_f32 % r2 == self.from_array(r1_f32.data % r2.data, rst_ref=r1) - # assert array % r2 == self.from_array(array % r2.data, rst_ref=r1) # this case fails - assert r2 % array == self.from_array(r2.data % array, rst_ref=r1) + assert array_3d % r2 == self.from_array(array_3d % r2.data, rst_ref=r1) + assert r2 % array_3d == self.from_array(r2.data % array_3d, rst_ref=r1) + assert array_2d % r2 == self.from_array(array_2d[np.newaxis, :, :] % r2.data, rst_ref=r1) + assert r2 % array_2d == self.from_array(r2.data % array_2d[np.newaxis, :, :], rst_ref=r1) assert r1 % floatval == self.from_array(r1.data.astype("float32") % floatval, rst_ref=r1) @pytest.mark.parametrize("op", ops_2args) # type: ignore From a6fb6455212b25cc737adbd4c9e8c8b5c97884bf Mon Sep 17 00:00:00 2001 From: rhugonne Date: Tue, 13 Sep 2022 08:59:14 +0200 Subject: [PATCH 14/18] Account for amaury comments --- geoutils/georaster/raster.py | 6 +++--- tests/test_georaster.py | 16 ++++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index c91c16e7..7a09aea2 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -1065,7 +1065,7 @@ def __array_ufunc__( {"data": output[1], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} ) - def __array_function__(self, func: Callable[[np.ndarray, Any], Any], types: type, args: Any, kwargs: Any) -> Any: + def __array_function__(self, func: Callable[[np.ndarray, Any], Any], types: tuple[type], args: Any, kwargs: Any) -> Any: """ Method to cast NumPy array function directly on a Raster object by applying it to the masked array. A limited number of function is supported, listed in raster._HANDLED_FUNCTIONS. @@ -1087,10 +1087,10 @@ def __array_function__(self, func: Callable[[np.ndarray, Any], Any], types: type # For percentiles and quantiles, there exist no masked array version, so we compute on the valid data directly elif func.__name__ in ["percentile", "nanpercentile"]: - first_arg = args[0].data.data[~args[0].data.mask] + first_arg = args[0].data.compressed() elif func.__name__ in ["quantile", "nanquantile"]: - first_arg = args[0].data.data[~args[0].data.mask] + first_arg = args[0].data.compressed() # Otherwise, we run the numpy function normally (most take masks into account) else: diff --git a/tests/test_georaster.py b/tests/test_georaster.py index 6fc67008..fd2dcd3f 100644 --- a/tests/test_georaster.py +++ b/tests/test_georaster.py @@ -2058,8 +2058,10 @@ def test_array_ufunc_1nin_1nout(self, ufunc_str: str, nodata_init: None | str, d # For two outputs elif ufunc.nout == 2: - assert np.ma.allequal(ufunc(rst.data)[0], ufunc(rst)[0].data) and np.ma.allequal( - ufunc(rst.data)[1], ufunc(rst)[1].data + outputs_rst = ufunc(rst) + outputs_ma = ufunc(rst.data) + assert np.ma.allequal(outputs_ma[0], outputs_rst[0].data) and np.ma.allequal( + outputs_ma[1], outputs_rst[1].data ) # If the input dtype is not possible, check that NumPy raises a TypeError @@ -2139,8 +2141,10 @@ def test_array_ufunc_2nin_1nout( # For two outputs elif ufunc.nout == 2: - assert np.ma.allequal(ufunc(rst1.data, rst2.data)[0], ufunc(rst1, rst2)[0].data) and np.ma.allequal( - ufunc(rst1.data, rst2.data)[1], ufunc(rst1, rst2)[1].data + outputs_rst = ufunc(rst1, rst2) + outputs_ma = ufunc(rst1.data, rst2.data) + assert np.ma.allequal(outputs_ma[0], outputs_rst[0].data) and np.ma.allequal( + outputs_ma[1], outputs_rst[1].data ) # If the input dtype is not possible, check that NumPy raises a TypeError @@ -2185,11 +2189,11 @@ def test_array_functions(self, arrfunc_str: str, dtype: str, nodata_init: None | arg = 80.0 # For percentiles and quantiles, the statistic is computed after removing the masked values output_rst = arrfunc(rst, arg) - output_ma = arrfunc(rst.data.data[~rst.data.mask], arg) + output_ma = arrfunc(rst.data.compressed(), arg) elif "quantile" in arrfunc_str: arg = 0.8 output_rst = arrfunc(rst, arg) - output_ma = arrfunc(rst.data.data[~rst.data.mask], arg) + output_ma = arrfunc(rst.data.compressed(), arg) elif "median" in arrfunc_str: # For the median, the statistic is computed by masking the values through np.ma.median output_rst = arrfunc(rst) From 3d78d28cd47ebbfa69cb4766c251acfe26f96891 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Tue, 13 Sep 2022 08:59:43 +0200 Subject: [PATCH 15/18] Final linting --- geoutils/georaster/raster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index 7a09aea2..655ee673 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -1065,7 +1065,9 @@ def __array_ufunc__( {"data": output[1], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} ) - def __array_function__(self, func: Callable[[np.ndarray, Any], Any], types: tuple[type], args: Any, kwargs: Any) -> Any: + def __array_function__( + self, func: Callable[[np.ndarray, Any], Any], types: tuple[type], args: Any, kwargs: Any + ) -> Any: """ Method to cast NumPy array function directly on a Raster object by applying it to the masked array. A limited number of function is supported, listed in raster._HANDLED_FUNCTIONS. From 189fc4d317e11c1e045b30ba630f7b482caee942 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Tue, 13 Sep 2022 09:29:43 +0200 Subject: [PATCH 16/18] Change call to __class__ to from_array --- geoutils/georaster/raster.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index 655ee673..a3bb2fe5 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -1026,7 +1026,7 @@ def __array_ufunc__( if ufunc.nin == 1: # If the universal function has only one output if ufunc.nout == 1: - return self.__class__( + return self.from_array( { "data": getattr(ufunc, method)(inputs[0].data, **kwargs), # type: ignore "transform": self.transform, @@ -1038,16 +1038,16 @@ def __array_ufunc__( # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) else: output = getattr(ufunc, method)(inputs[0].data, **kwargs) # type: ignore - return self.__class__( + return self.from_array( {"data": output[0], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} - ), self.__class__( + ), self.from_array( {"data": output[1], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} ) # If the universal function takes two inputs (Note: no ufunc exists that has three inputs or more) else: if ufunc.nout == 1: - return self.__class__( + return self.from_array( { "data": getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), # type: ignore "transform": self.transform, @@ -1059,9 +1059,9 @@ def __array_ufunc__( # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) else: output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) # type: ignore - return self.__class__( + return self.from_array( {"data": output[0], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} - ), self.__class__( + ), self.from_array( {"data": output[1], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} ) From 8d85d02b1eb9a19fc6653437245dcc6cd5976726 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Tue, 13 Sep 2022 09:34:25 +0200 Subject: [PATCH 17/18] Change call to __class__ to from_array --- geoutils/georaster/raster.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index a3bb2fe5..35a4f824 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -1027,42 +1027,37 @@ def __array_ufunc__( # If the universal function has only one output if ufunc.nout == 1: return self.from_array( - { - "data": getattr(ufunc, method)(inputs[0].data, **kwargs), # type: ignore - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata, - } - ) + data=getattr(ufunc, method)(inputs[0].data, **kwargs), + transform=self.transform, + crs=self.crs, + nodata=self.nodata) # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) else: - output = getattr(ufunc, method)(inputs[0].data, **kwargs) # type: ignore + output = getattr(ufunc, method)(inputs[0].data, **kwargs) return self.from_array( - {"data": output[0], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} + data=output[0], transform=self.transform, crs=self.crs, nodata=self.nodata ), self.from_array( - {"data": output[1], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} + data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata ) # If the universal function takes two inputs (Note: no ufunc exists that has three inputs or more) else: if ufunc.nout == 1: return self.from_array( - { - "data": getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), # type: ignore - "transform": self.transform, - "crs": self.crs, - "nodata": self.nodata, - } + data=getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), + transform=self.transform, + crs=self.crs, + nodata=self.nodata ) # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) else: - output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) # type: ignore + output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) return self.from_array( - {"data": output[0], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} + data=output[0], transform=self.transform, crs=self.crs, nodata=self.nodata ), self.from_array( - {"data": output[1], "transform": self.transform, "crs": self.crs, "nodata": self.nodata} + data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata ) def __array_function__( From 28301355e48c372e49786d7b96381ef7cc18a7b1 Mon Sep 17 00:00:00 2001 From: rhugonne Date: Tue, 13 Sep 2022 09:36:20 +0200 Subject: [PATCH 18/18] Linting --- geoutils/georaster/raster.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/geoutils/georaster/raster.py b/geoutils/georaster/raster.py index 35a4f824..98724279 100644 --- a/geoutils/georaster/raster.py +++ b/geoutils/georaster/raster.py @@ -1027,38 +1027,35 @@ def __array_ufunc__( # If the universal function has only one output if ufunc.nout == 1: return self.from_array( - data=getattr(ufunc, method)(inputs[0].data, **kwargs), - transform=self.transform, - crs=self.crs, - nodata=self.nodata) + data=getattr(ufunc, method)(inputs[0].data, **kwargs), # type: ignore + transform=self.transform, + crs=self.crs, + nodata=self.nodata, + ) # type: ignore # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) else: - output = getattr(ufunc, method)(inputs[0].data, **kwargs) + output = getattr(ufunc, method)(inputs[0].data, **kwargs) # type: ignore return self.from_array( data=output[0], transform=self.transform, crs=self.crs, nodata=self.nodata - ), self.from_array( - data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata - ) + ), self.from_array(data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata) # If the universal function takes two inputs (Note: no ufunc exists that has three inputs or more) else: if ufunc.nout == 1: return self.from_array( - data=getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), - transform=self.transform, - crs=self.crs, - nodata=self.nodata + data=getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs), # type: ignore + transform=self.transform, + crs=self.crs, + nodata=self.nodata, ) # If the universal function has two outputs (Note: no ufunc exists that has three outputs or more) else: - output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) + output = getattr(ufunc, method)(inputs[0].data, inputs[1].data, **kwargs) # type: ignore return self.from_array( data=output[0], transform=self.transform, crs=self.crs, nodata=self.nodata - ), self.from_array( - data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata - ) + ), self.from_array(data=output[1], transform=self.transform, crs=self.crs, nodata=self.nodata) def __array_function__( self, func: Callable[[np.ndarray, Any], Any], types: tuple[type], args: Any, kwargs: Any