From 5377687473ecb78db085b47f4f5774eb1df93970 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 1 Jul 2020 20:04:59 +0200 Subject: [PATCH 01/15] use the latest image of RTD (#4191) --- readthedocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs.yml b/readthedocs.yml index 173d61ec6f3..88aee82a44b 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,7 +1,7 @@ version: 2 build: - image: stable + image: latest conda: environment: ci/requirements/doc.yml From 06c213ead8471520f15ac8378ffe33b36ba4e818 Mon Sep 17 00:00:00 2001 From: David Brochart Date: Thu, 2 Jul 2020 14:09:38 +0200 Subject: [PATCH 02/15] Fix typo (#4192) --- doc/related-projects.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 9891f1a6bc2..8e8e3f63098 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -76,7 +76,7 @@ Visualization - `Datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data. - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews. - `psyplot `_: Interactive data visualization with python. -- `xarray-leaflet `_: An xarray extension for tiles map plotting based on ipyleaflet. +- `xarray-leaflet `_: An xarray extension for tiled map plotting based on ipyleaflet. Non-Python projects ~~~~~~~~~~~~~~~~~~~ From e216720ae8168d3e9fb5470cf45f0d382ebc6e20 Mon Sep 17 00:00:00 2001 From: keewis Date: Thu, 2 Jul 2020 18:13:59 +0200 Subject: [PATCH 03/15] More pint compatibility: silence UnitStrippedWarnings (#4163) * globally promote UnitStrippedWarning to errors * separately test apply_ufunc with units in dims, coords and data * split the DataArray align test into data, dims and coords tests * use dtypes instead of python types and use a dtype specific fill value * rewrite the dataset align tests * compare with dtypes.NA instead of using np.isnan * mention the issue in the xfail reason * make sure the combine_* variants are properly separated from each other * improve the test case names * note that broadcast uses align * properly separate the test cases for concat * always use the same reason when xfailing units in indexes tests * also check that the replication functions work with dims and units * apply full_like to the data instead of the variable * check full_like with units in dims, data and coords separately * clearly separate the test variants of the merge tests * don't use indexes for the dataset where tests * replace numpy.testing.assert_allclose with assert numpy.allclose * remove a conditional xfail that depends on a very old pint version * use assert_identical from the local namespace * properly separate between the broadcast_like test variants * don't accept "data" as an alias of the DataArray's data * properly separate between the variants of the content manipulation tests * use assert np.allclose(...) instead of np.testing.assert_allclose(...) * don't test units in indexes in the isel tests * don't use units in indexes for the head / tail / thin tests * properly separate the variants of more tests * rewrite the squeeze tests * use assert_allclose from the module's namespace * rewrite the copy tests * xfail the equal comparison for a pint version lower than 0.14 * try to implement a duckarray friendly assert_array_equal * add tests for not raising an assertion error * skip only the dask test if it isn't installed * also check using pint if available * add a duckarray version of np.testing.assert_allclose * add both to __all__ * make both available in xarray.tests * don't inherit from VariableSubtests since that was not written to test duck arrays. * test the constant pad mode along with all other modes * remove most pint version checks, now that pint 0.13 has been released * use conda to install pint * xfail the DataArray comparison test until pint's dev version fixed it * add tests for the pad method of DataArray and Dataset * add tests for weighted * update whats-new.rst * replace assert np.allclose(...) with assert_duckarray_allclose(...) * fix the dask fallback * xfail the pint tests for now since there's a bug in pint * use utils.is_array_like and utils.is_scalar --- ci/requirements/py36-min-nep18.yml | 3 +- ci/requirements/py36.yml | 2 +- ci/requirements/py37-windows.yml | 2 +- ci/requirements/py37.yml | 2 +- ci/requirements/py38-all-but-dask.yml | 2 +- ci/requirements/py38.yml | 2 +- doc/whats-new.rst | 2 +- xarray/core/common.py | 2 +- xarray/core/utils.py | 6 + xarray/testing.py | 65 +- xarray/tests/__init__.py | 4 + xarray/tests/test_testing.py | 99 ++ xarray/tests/test_units.py | 1354 ++++++++++++++----------- 13 files changed, 919 insertions(+), 626 deletions(-) diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index cd2b1a18c77..dd543ce4ddf 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -11,6 +11,7 @@ dependencies: - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 + - pint=0.13 - pip - pytest - pytest-cov @@ -18,5 +19,3 @@ dependencies: - scipy=1.2 - setuptools=41.2 - sparse=0.8 - - pip: - - pint==0.13 diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index aa2baf9dcce..a500173f277 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -28,6 +28,7 @@ dependencies: - numba - numpy - pandas + - pint - pip - pseudonetcdf - pydap @@ -44,4 +45,3 @@ dependencies: - zarr - pip: - numbagg - - pint diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml index 8b12704d644..e9e5c7a900a 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/py37-windows.yml @@ -28,6 +28,7 @@ dependencies: - numba - numpy - pandas + - pint - pip - pseudonetcdf - pydap @@ -44,4 +45,3 @@ dependencies: - zarr - pip: - numbagg - - pint diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index 70c453e8776..dba3926596e 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -28,6 +28,7 @@ dependencies: - numba - numpy - pandas + - pint - pip - pseudonetcdf - pydap @@ -44,4 +45,3 @@ dependencies: - zarr - pip: - numbagg - - pint diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 6d76eecbd6a..a375d9e1e5a 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -25,6 +25,7 @@ dependencies: - numba - numpy - pandas + - pint - pip - pseudonetcdf - pydap @@ -41,4 +42,3 @@ dependencies: - zarr - pip: - numbagg - - pint diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml index 6f35138978c..7dff3a1bd97 100644 --- a/ci/requirements/py38.yml +++ b/ci/requirements/py38.yml @@ -28,6 +28,7 @@ dependencies: - numba - numpy - pandas + - pint - pip - pseudonetcdf - pydap @@ -44,4 +45,3 @@ dependencies: - zarr - pip: - numbagg - - pint diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 086cddee0a0..e4223f2b4e0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -91,7 +91,7 @@ New Features - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`, :pull:`4135`) By `Kai Mühlbauer `_ and `Pascal Bourgault `_. -- More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`) +- More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`, :pull:`4163`) By `Justus Magin `_. - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even without ``append_dim``, as long as dimension sizes do not change. diff --git a/xarray/core/common.py b/xarray/core/common.py index f759f4c32dd..67dc0fda461 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1434,7 +1434,7 @@ def _full_like_variable(other, fill_value, dtype: DTypeLike = None): other.shape, fill_value, dtype=dtype, chunks=other.data.chunks ) else: - data = np.full_like(other, fill_value, dtype=dtype) + data = np.full_like(other.data, fill_value, dtype=dtype) return Variable(dims=other.dims, data=data, attrs=other.attrs) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 0542f850b02..668405ba574 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -247,6 +247,12 @@ def is_list_like(value: Any) -> bool: return isinstance(value, list) or isinstance(value, tuple) +def is_array_like(value: Any) -> bool: + return ( + hasattr(value, "ndim") and hasattr(value, "shape") and hasattr(value, "dtype") + ) + + def either_dict_or_kwargs( pos_kwargs: Optional[Mapping[Hashable, T]], kw_kwargs: Mapping[str, T], diff --git a/xarray/testing.py b/xarray/testing.py index 9681503414e..ec479ef09d4 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -11,7 +11,14 @@ from xarray.core.indexes import default_indexes from xarray.core.variable import IndexVariable, Variable -__all__ = ("assert_allclose", "assert_chunks_equal", "assert_equal", "assert_identical") +__all__ = ( + "assert_allclose", + "assert_chunks_equal", + "assert_duckarray_equal", + "assert_duckarray_allclose", + "assert_equal", + "assert_identical", +) def _decode_string_data(data): @@ -148,6 +155,62 @@ def compat_variable(a, b): raise TypeError("{} not supported by assertion comparison".format(type(a))) +def _format_message(x, y, err_msg, verbose): + diff = x - y + abs_diff = max(abs(diff)) + rel_diff = "not implemented" + + n_diff = int(np.count_nonzero(diff)) + n_total = diff.size + + fraction = f"{n_diff} / {n_total}" + percentage = float(n_diff / n_total * 100) + + parts = [ + "Arrays are not equal", + err_msg, + f"Mismatched elements: {fraction} ({percentage:.0f}%)", + f"Max absolute difference: {abs_diff}", + f"Max relative difference: {rel_diff}", + ] + if verbose: + parts += [ + f" x: {x!r}", + f" y: {y!r}", + ] + + return "\n".join(parts) + + +def assert_duckarray_allclose( + actual, desired, rtol=1e-07, atol=0, err_msg="", verbose=True +): + """ Like `np.testing.assert_allclose`, but for duckarrays. """ + __tracebackhide__ = True + + allclose = duck_array_ops.allclose_or_equiv(actual, desired, rtol=rtol, atol=atol) + assert allclose, _format_message(actual, desired, err_msg=err_msg, verbose=verbose) + + +def assert_duckarray_equal(x, y, err_msg="", verbose=True): + """ Like `np.testing.assert_array_equal`, but for duckarrays """ + __tracebackhide__ = True + + if not utils.is_array_like(x) and not utils.is_scalar(x): + x = np.asarray(x) + + if not utils.is_array_like(y) and not utils.is_scalar(y): + y = np.asarray(y) + + if (utils.is_array_like(x) and utils.is_scalar(y)) or ( + utils.is_scalar(x) and utils.is_array_like(y) + ): + equiv = (x == y).all() + else: + equiv = duck_array_ops.array_equiv(x, y) + assert equiv, _format_message(x, y, err_msg=err_msg, verbose=verbose) + + def assert_chunks_equal(a, b): """ Assert that chunksizes along chunked dimensions are equal. diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 40c5cfa267c..9021c4e7dbc 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -16,6 +16,10 @@ from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401 from xarray.core.indexing import ExplicitlyIndexed from xarray.core.options import set_options +from xarray.testing import ( # noqa: F401 + assert_duckarray_allclose, + assert_duckarray_equal, +) # import mpl and change the backend before other mpl imports try: diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_testing.py index f4961af58e9..39ad250246b 100644 --- a/xarray/tests/test_testing.py +++ b/xarray/tests/test_testing.py @@ -1,7 +1,31 @@ +import numpy as np import pytest import xarray as xr +from . import has_dask + +try: + from dask.array import from_array as dask_from_array +except ImportError: + dask_from_array = lambda x: x + +try: + import pint + + unit_registry = pint.UnitRegistry(force_ndarray_like=True) + + def quantity(x): + return unit_registry.Quantity(x, "m") + + has_pint = True +except ImportError: + + def quantity(x): + return x + + has_pint = False + def test_allclose_regression(): x = xr.DataArray(1.01) @@ -30,3 +54,78 @@ def test_allclose_regression(): def test_assert_allclose(obj1, obj2): with pytest.raises(AssertionError): xr.testing.assert_allclose(obj1, obj2) + + +@pytest.mark.filterwarnings("error") +@pytest.mark.parametrize( + "duckarray", + ( + pytest.param(np.array, id="numpy"), + pytest.param( + dask_from_array, + id="dask", + marks=pytest.mark.skipif(not has_dask, reason="requires dask"), + ), + pytest.param( + quantity, + id="pint", + marks=[ + pytest.mark.skipif(not has_pint, reason="requires pint"), + pytest.mark.xfail( + reason="inconsistencies in the return value of pint's implementation of eq" + ), + ], + ), + ), +) +@pytest.mark.parametrize( + ["obj1", "obj2"], + ( + pytest.param([1e-10, 2], [0.0, 2.0], id="both arrays"), + pytest.param([1e-17, 2], 0.0, id="second scalar"), + pytest.param(0.0, [1e-17, 2], id="first scalar"), + ), +) +def test_assert_duckarray_equal_failing(duckarray, obj1, obj2): + # TODO: actually check the repr + a = duckarray(obj1) + b = duckarray(obj2) + with pytest.raises(AssertionError): + xr.testing.assert_duckarray_equal(a, b) + + +@pytest.mark.filterwarnings("error") +@pytest.mark.parametrize( + "duckarray", + ( + pytest.param(np.array, id="numpy"), + pytest.param( + dask_from_array, + id="dask", + marks=pytest.mark.skipif(not has_dask, reason="requires dask"), + ), + pytest.param( + quantity, + id="pint", + marks=[ + pytest.mark.skipif(not has_pint, reason="requires pint"), + pytest.mark.xfail( + reason="inconsistencies in the return value of pint's implementation of eq" + ), + ], + ), + ), +) +@pytest.mark.parametrize( + ["obj1", "obj2"], + ( + pytest.param([0, 2], [0.0, 2.0], id="both arrays"), + pytest.param([0, 0], 0.0, id="second scalar"), + pytest.param(0.0, [0, 0], id="first scalar"), + ), +) +def test_assert_duckarray_equal(duckarray, obj1, obj2): + a = duckarray(obj1) + b = duckarray(obj2) + + xr.testing.assert_duckarray_equal(a, b) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 20a5f0e8613..619fa10116d 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1,16 +1,16 @@ import functools import operator -from distutils.version import LooseVersion import numpy as np import pandas as pd import pytest import xarray as xr +from xarray.core import dtypes from xarray.core.npcompat import IS_NEP18_ACTIVE -from xarray.testing import assert_allclose, assert_equal, assert_identical -from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects +from . import assert_allclose, assert_duckarray_allclose, assert_equal, assert_identical +from .test_variable import _PAD_XR_NP_ARGS pint = pytest.importorskip("pint") DimensionalityError = pint.errors.DimensionalityError @@ -26,7 +26,7 @@ pytest.mark.skipif( not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled" ), - # pytest.mark.filterwarnings("ignore:::pint[.*]"), + pytest.mark.filterwarnings("error::pint.UnitStrippedWarning"), ] @@ -180,12 +180,7 @@ def attach_units(obj, units): new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs) elif isinstance(obj, xr.DataArray): # try the array name, "data" and None, then fall back to dimensionless - data_units = ( - units.get(obj.name, None) - or units.get("data", None) - or units.get(None, None) - or 1 - ) + data_units = units.get(obj.name, None) or units.get(None, None) or 1 data = array_attach_units(obj.data, data_units) @@ -264,7 +259,7 @@ def assert_units_equal(a, b): assert extract_units(a) == extract_units(b) -@pytest.fixture(params=[float, int]) +@pytest.fixture(params=[np.dtype(float), np.dtype(int)], ids=str) def dtype(request): return request.param @@ -364,14 +359,31 @@ def __repr__(self): return f"function_{self.name}" -def test_apply_ufunc_dataarray(dtype): +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), +) +def test_apply_ufunc_dataarray(variant, dtype): + variants = { + "data": (unit_registry.m, 1, 1), + "dims": (1, unit_registry.m, 1), + "coords": (1, 1, unit_registry.m), + } + data_unit, dim_unit, coord_unit = variants.get(variant) func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) - array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.m - x = np.arange(20) * unit_registry.s - data_array = xr.DataArray(data=array, dims="x", coords={"x": x}) + array = np.linspace(0, 10, 20).astype(dtype) * data_unit + x = np.arange(20) * dim_unit + u = np.linspace(-1, 1, 20) * coord_unit + data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)}) expected = attach_units(func(strip_units(data_array)), extract_units(data_array)) actual = func(data_array) @@ -380,20 +392,39 @@ def test_apply_ufunc_dataarray(dtype): assert_identical(expected, actual) -def test_apply_ufunc_dataset(dtype): +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), +) +def test_apply_ufunc_dataset(variant, dtype): + variants = { + "data": (unit_registry.m, 1, 1), + "dims": (1, unit_registry.m, 1), + "coords": (1, 1, unit_registry.s), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + func = functools.partial( xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} ) - array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m - array2 = np.linspace(0, 10, 5).astype(dtype) * unit_registry.m + array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit + array2 = np.linspace(0, 10, 5).astype(dtype) * data_unit + + x = np.arange(5) * dim_unit + y = np.arange(10) * dim_unit - x = np.arange(5) * unit_registry.s - y = np.arange(10) * unit_registry.m + u = np.linspace(-1, 1, 10) * coord_unit ds = xr.Dataset( data_vars={"a": (("x", "y"), array1), "b": ("x", array2)}, - coords={"x": x, "y": y}, + coords={"x": x, "y": y, "u": ("y", u)}, ) expected = attach_units(func(strip_units(ds)), extract_units(ds)) @@ -403,10 +434,6 @@ def test_apply_ufunc_dataset(dtype): assert_identical(expected, actual) -# TODO: remove once pint==0.12 has been released -@pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" -) @pytest.mark.parametrize( "unit,error", ( @@ -424,44 +451,61 @@ def test_apply_ufunc_dataset(dtype): "variant", ( "data", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), "coords", ), ) -@pytest.mark.parametrize("fill_value", (10, np.nan)) -def test_align_dataarray(fill_value, variant, unit, error, dtype): +@pytest.mark.parametrize("value", (10, dtypes.NA)) +def test_align_dataarray(value, variant, unit, error, dtype): + if variant == "coords" and ( + value != dtypes.NA or isinstance(unit, unit_registry.Unit) + ): + pytest.xfail( + reason=( + "fill_value is used for both data variables and coords. " + "See https://github.com/pydata/xarray/issues/4165" + ) + ) + + fill_value = dtypes.get_fill_value(dtype) if value == dtypes.NA else value + original_unit = unit_registry.m variants = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), } - data_unit, dim_unit, coord_unit = variants.get(variant) + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) - array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit - array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit - x = np.arange(2) * original_unit + array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit1 + array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit2 - y1 = np.arange(5) * original_unit - y2 = np.arange(2, 7) * dim_unit - y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit - y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit + x = np.arange(2) * dim_unit1 + y1 = np.arange(5) * dim_unit1 + y2 = np.arange(2, 7) * dim_unit2 + + u1 = np.array([3, 5, 7, 8, 9]) * coord_unit1 + u2 = np.array([7, 8, 9, 11, 13]) * coord_unit2 coords1 = {"x": x, "y": y1} coords2 = {"x": x, "y": y2} if variant == "coords": - coords1["y_a"] = ("y", y_a1) - coords2["y_a"] = ("y", y_a2) + coords1["y_a"] = ("y", u1) + coords2["y_a"] = ("y", u2) data_array1 = xr.DataArray(data=array1, coords=coords1, dims=("x", "y")) data_array2 = xr.DataArray(data=array2, coords=coords2, dims=("x", "y")) - fill_value = fill_value * data_unit + fill_value = fill_value * data_unit2 func = function(xr.align, join="outer", fill_value=fill_value) - if error is not None and not ( - np.isnan(fill_value) and not isinstance(fill_value, Quantity) - ): + if error is not None and (value != dtypes.NA or isinstance(fill_value, Quantity)): with pytest.raises(error): func(data_array1, data_array2) @@ -469,7 +513,7 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): stripped_kwargs = { key: strip_units( - convert_units(value, {None: original_unit if data_unit != 1 else None}) + convert_units(value, {None: data_unit1 if data_unit2 != 1 else None}) ) for key, value in func.kwargs.items() } @@ -494,10 +538,6 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): assert_allclose(expected_b, actual_b) -# TODO: remove once pint==0.12 has been released -@pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" -) @pytest.mark.parametrize( "unit,error", ( @@ -515,45 +555,61 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): "variant", ( "data", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), "coords", ), ) -@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan))) -def test_align_dataset(fill_value, unit, variant, error, dtype): +@pytest.mark.parametrize("value", (10, dtypes.NA)) +def test_align_dataset(value, unit, variant, error, dtype): + if variant == "coords" and ( + value != dtypes.NA or isinstance(unit, unit_registry.Unit) + ): + pytest.xfail( + reason=( + "fill_value is used for both data variables and coords. " + "See https://github.com/pydata/xarray/issues/4165" + ) + ) + + fill_value = dtypes.get_fill_value(dtype) if value == dtypes.NA else value + original_unit = unit_registry.m variants = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), } - data_unit, dim_unit, coord_unit = variants.get(variant) + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) - array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit - array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit + array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit1 + array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit2 - x = np.arange(2) * original_unit + x = np.arange(2) * dim_unit1 + y1 = np.arange(5) * dim_unit1 + y2 = np.arange(2, 7) * dim_unit2 - y1 = np.arange(5) * original_unit - y2 = np.arange(2, 7) * dim_unit - y_a1 = np.array([3, 5, 7, 8, 9]) * original_unit - y_a2 = np.array([7, 8, 9, 11, 13]) * coord_unit + u1 = np.array([3, 5, 7, 8, 9]) * coord_unit1 + u2 = np.array([7, 8, 9, 11, 13]) * coord_unit2 coords1 = {"x": x, "y": y1} coords2 = {"x": x, "y": y2} if variant == "coords": - coords1["y_a"] = ("y", y_a1) - coords2["y_a"] = ("y", y_a2) + coords1["u"] = ("y", u1) + coords2["u"] = ("y", u2) ds1 = xr.Dataset(data_vars={"a": (("x", "y"), array1)}, coords=coords1) ds2 = xr.Dataset(data_vars={"a": (("x", "y"), array2)}, coords=coords2) - fill_value = fill_value * data_unit + fill_value = fill_value * data_unit2 func = function(xr.align, join="outer", fill_value=fill_value) - if error is not None and not ( - np.isnan(fill_value) and not isinstance(fill_value, Quantity) - ): + if error is not None and (value != dtypes.NA or isinstance(fill_value, Quantity)): with pytest.raises(error): func(ds1, ds2) @@ -561,14 +617,14 @@ def test_align_dataset(fill_value, unit, variant, error, dtype): stripped_kwargs = { key: strip_units( - convert_units(value, {None: original_unit if data_unit != 1 else None}) + convert_units(value, {None: data_unit1 if data_unit2 != 1 else None}) ) for key, value in func.kwargs.items() } units_a = extract_units(ds1) units_b = extract_units(ds2) expected_a, expected_b = func( - strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs + strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs, ) expected_a = attach_units(expected_a, units_a) if isinstance(array2, Quantity): @@ -585,6 +641,7 @@ def test_align_dataset(fill_value, unit, variant, error, dtype): def test_broadcast_dataarray(dtype): + # uses align internally so more thorough tests are not needed array1 = np.linspace(0, 10, 2) * unit_registry.Pa array2 = np.linspace(0, 10, 3) * unit_registry.Pa @@ -606,6 +663,7 @@ def test_broadcast_dataarray(dtype): def test_broadcast_dataset(dtype): + # uses align internally so more thorough tests are not needed array1 = np.linspace(0, 10, 2) * unit_registry.Pa array2 = np.linspace(0, 10, 3) * unit_registry.Pa @@ -657,7 +715,9 @@ def test_broadcast_dataset(dtype): "variant", ( "data", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), "coords", ), ) @@ -665,31 +725,35 @@ def test_combine_by_coords(variant, unit, error, dtype): original_unit = unit_registry.m variants = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), } - data_unit, dim_unit, coord_unit = variants.get(variant) - - array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit - array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit - x = np.arange(1, 4) * 10 * original_unit - y = np.arange(2) * original_unit - z = np.arange(3) * original_unit - - other_array1 = np.ones_like(array1) * data_unit - other_array2 = np.ones_like(array2) * data_unit - other_x = np.arange(1, 4) * 10 * dim_unit - other_y = np.arange(2, 4) * dim_unit - other_z = np.arange(3, 6) * coord_unit + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) + + array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 + array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 + x = np.arange(1, 4) * 10 * dim_unit1 + y = np.arange(2) * dim_unit1 + u = np.arange(3) * coord_unit1 + + other_array1 = np.ones_like(array1) * data_unit2 + other_array2 = np.ones_like(array2) * data_unit2 + other_x = np.arange(1, 4) * 10 * dim_unit2 + other_y = np.arange(2, 4) * dim_unit2 + other_u = np.arange(3, 6) * coord_unit2 ds = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, - coords={"x": x, "y": y, "z": ("x", z)}, + coords={"x": x, "y": y, "u": ("x", u)}, ) other = xr.Dataset( data_vars={"a": (("y", "x"), other_array1), "b": (("y", "x"), other_array2)}, - coords={"x": other_x, "y": other_y, "z": ("x", other_z)}, + coords={"x": other_x, "y": other_y, "u": ("x", other_u)}, ) if error is not None: @@ -728,7 +792,9 @@ def test_combine_by_coords(variant, unit, error, dtype): "variant", ( "data", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), "coords", ), ) @@ -736,18 +802,22 @@ def test_combine_nested(variant, unit, error, dtype): original_unit = unit_registry.m variants = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), } - data_unit, dim_unit, coord_unit = variants.get(variant) + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) - array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit - array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 + array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 - x = np.arange(1, 4) * 10 * original_unit - y = np.arange(2) * original_unit - z = np.arange(3) * original_unit + x = np.arange(1, 4) * 10 * dim_unit1 + y = np.arange(2) * dim_unit1 + z = np.arange(3) * coord_unit1 ds1 = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, @@ -755,35 +825,35 @@ def test_combine_nested(variant, unit, error, dtype): ) ds2 = xr.Dataset( data_vars={ - "a": (("y", "x"), np.ones_like(array1) * data_unit), - "b": (("y", "x"), np.ones_like(array2) * data_unit), + "a": (("y", "x"), np.ones_like(array1) * data_unit2), + "b": (("y", "x"), np.ones_like(array2) * data_unit2), }, coords={ - "x": np.arange(3) * dim_unit, - "y": np.arange(2, 4) * dim_unit, - "z": ("x", np.arange(-3, 0) * coord_unit), + "x": np.arange(3) * dim_unit2, + "y": np.arange(2, 4) * dim_unit2, + "z": ("x", np.arange(-3, 0) * coord_unit2), }, ) ds3 = xr.Dataset( data_vars={ - "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit), - "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit), + "a": (("y", "x"), np.full_like(array1, fill_value=np.nan) * data_unit2), + "b": (("y", "x"), np.full_like(array2, fill_value=np.nan) * data_unit2), }, coords={ - "x": np.arange(3, 6) * dim_unit, - "y": np.arange(4, 6) * dim_unit, - "z": ("x", np.arange(3, 6) * coord_unit), + "x": np.arange(3, 6) * dim_unit2, + "y": np.arange(4, 6) * dim_unit2, + "z": ("x", np.arange(3, 6) * coord_unit2), }, ) ds4 = xr.Dataset( data_vars={ - "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit), - "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit), + "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit2), + "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit2), }, coords={ - "x": np.arange(6, 9) * dim_unit, - "y": np.arange(6, 8) * dim_unit, - "z": ("x", np.arange(6, 9) * coord_unit), + "x": np.arange(6, 9) * dim_unit2, + "y": np.arange(6, 8) * dim_unit2, + "z": ("x", np.arange(6, 9) * coord_unit2), }, ) @@ -828,22 +898,37 @@ def test_combine_nested(variant, unit, error, dtype): "variant", ( "data", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", ), ) def test_concat_dataarray(variant, unit, error, dtype): original_unit = unit_registry.m - variants = {"data": (unit, original_unit), "dims": (original_unit, unit)} - data_unit, dims_unit = variants.get(variant) + variants = { + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), + } + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) + + array1 = np.linspace(0, 5, 10).astype(dtype) * data_unit1 + array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit2 - array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m - array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit - x1 = np.arange(5, 15) * original_unit - x2 = np.arange(5) * dims_unit + x1 = np.arange(5, 15) * dim_unit1 + x2 = np.arange(5) * dim_unit2 + + u1 = np.linspace(1, 2, 10).astype(dtype) * coord_unit1 + u2 = np.linspace(0, 1, 5).astype(dtype) * coord_unit2 - arr1 = xr.DataArray(data=array1, coords={"x": x1}, dims="x") - arr2 = xr.DataArray(data=array2, coords={"x": x2}, dims="x") + arr1 = xr.DataArray(data=array1, coords={"x": x1, "u": ("x", u1)}, dims="x") + arr2 = xr.DataArray(data=array2, coords={"x": x2, "u": ("x", u2)}, dims="x") if error is not None: with pytest.raises(error): @@ -881,22 +966,37 @@ def test_concat_dataarray(variant, unit, error, dtype): "variant", ( "data", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", ), ) def test_concat_dataset(variant, unit, error, dtype): original_unit = unit_registry.m - variants = {"data": (unit, original_unit), "dims": (original_unit, unit)} - data_unit, dims_unit = variants.get(variant) + variants = { + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), + } + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) - array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m - array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit - x1 = np.arange(5, 15) * original_unit - x2 = np.arange(5) * dims_unit + array1 = np.linspace(0, 5, 10).astype(dtype) * data_unit1 + array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit2 - ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1}) - ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2}) + x1 = np.arange(5, 15) * dim_unit1 + x2 = np.arange(5) * dim_unit2 + + u1 = np.linspace(1, 2, 10).astype(dtype) * coord_unit1 + u2 = np.linspace(0, 1, 5).astype(dtype) * coord_unit2 + + ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1, "u": ("x", u1)}) + ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2, "u": ("x", u2)}) if error is not None: with pytest.raises(error): @@ -915,10 +1015,6 @@ def test_concat_dataset(variant, unit, error, dtype): assert_identical(expected, actual) -# TODO: remove once pint==0.12 has been released -@pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" -) @pytest.mark.parametrize( "unit,error", ( @@ -936,7 +1032,9 @@ def test_concat_dataset(variant, unit, error, dtype): "variant", ( "data", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), "coords", ), ) @@ -944,29 +1042,33 @@ def test_merge_dataarray(variant, unit, error, dtype): original_unit = unit_registry.m variants = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), } - data_unit, dim_unit, coord_unit = variants.get(variant) - - array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit - x1 = np.arange(2) * original_unit - y1 = np.arange(3) * original_unit - u1 = np.linspace(10, 20, 2) * original_unit - v1 = np.linspace(10, 20, 3) * original_unit - - array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit - x2 = np.arange(2, 4) * dim_unit - z2 = np.arange(4) * original_unit - u2 = np.linspace(20, 30, 2) * coord_unit - w2 = np.linspace(10, 20, 4) * original_unit - - array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit - y3 = np.arange(3, 6) * dim_unit - z3 = np.arange(4, 8) * dim_unit - v3 = np.linspace(10, 20, 3) * coord_unit - w3 = np.linspace(10, 20, 4) * coord_unit + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) + + array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit1 + x1 = np.arange(2) * dim_unit1 + y1 = np.arange(3) * dim_unit1 + u1 = np.linspace(10, 20, 2) * coord_unit1 + v1 = np.linspace(10, 20, 3) * coord_unit1 + + array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit2 + x2 = np.arange(2, 4) * dim_unit2 + z2 = np.arange(4) * dim_unit1 + u2 = np.linspace(20, 30, 2) * coord_unit2 + w2 = np.linspace(10, 20, 4) * coord_unit1 + + array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit2 + y3 = np.arange(3, 6) * dim_unit2 + z3 = np.arange(4, 8) * dim_unit2 + v3 = np.linspace(10, 20, 3) * coord_unit2 + w3 = np.linspace(10, 20, 4) * coord_unit2 arr1 = xr.DataArray( name="a", @@ -993,31 +1095,22 @@ def test_merge_dataarray(variant, unit, error, dtype): return - units = {name: original_unit for name in list("axyzuvw")} - - convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) - expected_units = { - "a": original_unit, - "u": original_unit, - "v": original_unit, - "w": original_unit, - "x": original_unit, - "y": original_unit, - "z": original_unit, + units = { + "a": data_unit1, + "u": coord_unit1, + "v": coord_unit1, + "w": coord_unit1, + "x": dim_unit1, + "y": dim_unit1, + "z": dim_unit1, } + convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) - expected = convert_units( - attach_units( - xr.merge( - [ - convert_and_strip(arr1), - convert_and_strip(arr2), - convert_and_strip(arr3), - ] - ), - units, + expected = attach_units( + xr.merge( + [convert_and_strip(arr1), convert_and_strip(arr2), convert_and_strip(arr3)] ), - expected_units, + units, ) actual = xr.merge([arr1, arr2, arr3]) @@ -1026,10 +1119,6 @@ def test_merge_dataarray(variant, unit, error, dtype): assert_allclose(expected, actual) -# TODO: remove once pint==0.12 has been released -@pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" -) @pytest.mark.parametrize( "unit,error", ( @@ -1047,7 +1136,9 @@ def test_merge_dataarray(variant, unit, error, dtype): "variant", ( "data", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), "coords", ), ) @@ -1055,43 +1146,47 @@ def test_merge_dataset(variant, unit, error, dtype): original_unit = unit_registry.m variants = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), } - data_unit, dim_unit, coord_unit = variants.get(variant) + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) - array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit - array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + array1 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 + array2 = np.zeros(shape=(2, 3), dtype=dtype) * data_unit1 - x = np.arange(11, 14) * original_unit - y = np.arange(2) * original_unit - z = np.arange(3) * original_unit + x = np.arange(11, 14) * dim_unit1 + y = np.arange(2) * dim_unit1 + u = np.arange(3) * coord_unit1 ds1 = xr.Dataset( data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, - coords={"x": x, "y": y, "u": ("x", z)}, + coords={"x": x, "y": y, "u": ("x", u)}, ) ds2 = xr.Dataset( data_vars={ - "a": (("y", "x"), np.ones_like(array1) * data_unit), - "b": (("y", "x"), np.ones_like(array2) * data_unit), + "a": (("y", "x"), np.ones_like(array1) * data_unit2), + "b": (("y", "x"), np.ones_like(array2) * data_unit2), }, coords={ - "x": np.arange(3) * dim_unit, - "y": np.arange(2, 4) * dim_unit, - "u": ("x", np.arange(-3, 0) * coord_unit), + "x": np.arange(3) * dim_unit2, + "y": np.arange(2, 4) * dim_unit2, + "u": ("x", np.arange(-3, 0) * coord_unit2), }, ) ds3 = xr.Dataset( data_vars={ - "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit), - "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit), + "a": (("y", "x"), np.full_like(array1, np.nan) * data_unit2), + "b": (("y", "x"), np.full_like(array2, np.nan) * data_unit2), }, coords={ - "x": np.arange(3, 6) * dim_unit, - "y": np.arange(4, 6) * dim_unit, - "u": ("x", np.arange(3, 6) * coord_unit), + "x": np.arange(3, 6) * dim_unit2, + "y": np.arange(4, 6) * dim_unit2, + "u": ("x", np.arange(3, 6) * coord_unit2), }, ) @@ -1104,15 +1199,9 @@ def test_merge_dataset(variant, unit, error, dtype): units = extract_units(ds1) convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) - expected_units = {name: original_unit for name in list("abxyzu")} - expected = convert_units( - attach_units( - func( - [convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)] - ), - units, - ), - expected_units, + expected = attach_units( + func([convert_and_strip(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), + units, ) actual = func([ds1, ds2, ds3]) @@ -1120,35 +1209,79 @@ def test_merge_dataset(variant, unit, error, dtype): assert_allclose(expected, actual) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), +) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) -def test_replication_dataarray(func, dtype): - array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s - data_array = xr.DataArray(data=array, dims="x") +def test_replication_dataarray(func, variant, dtype): + unit = unit_registry.m + + variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) - numpy_func = getattr(np, func.__name__) - units = extract_units(numpy_func(data_array)) - expected = attach_units(func(data_array), units) + array = np.linspace(0, 10, 20).astype(dtype) * data_unit + x = np.arange(20) * dim_unit + u = np.linspace(0, 1, 20) * coord_unit + + data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)}) + units = extract_units(data_array) + units.pop(data_array.name) + + expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) assert_units_equal(expected, actual) assert_identical(expected, actual) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), +) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) -def test_replication_dataset(func, dtype): - array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s - array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa - x = np.arange(20).astype(dtype) * unit_registry.m - y = np.arange(10).astype(dtype) * unit_registry.m - z = y.to(unit_registry.mm) +def test_replication_dataset(func, variant, dtype): + unit = unit_registry.m + + variants = { + "data": ((unit_registry.m, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit, 1), + "coords": ((1, 1), 1, unit), + } + (data_unit1, data_unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(0, 10, 20).astype(dtype) * data_unit1 + array2 = np.linspace(5, 10, 10).astype(dtype) * data_unit2 + x = np.arange(20).astype(dtype) * dim_unit + y = np.arange(10).astype(dtype) * dim_unit + u = np.linspace(0, 1, 10) * coord_unit ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("y", array2)}, - coords={"x": x, "y": y, "z": ("y", z)}, + coords={"x": x, "y": y, "u": ("y", u)}, ) + units = { + name: unit + for name, unit in extract_units(ds).items() + if name not in ds.data_vars + } - numpy_func = getattr(np, func.__name__) - units = extract_units(ds.map(numpy_func)) expected = attach_units(func(strip_units(ds)), units) actual = func(ds) @@ -1157,37 +1290,40 @@ def test_replication_dataset(func, dtype): assert_identical(expected, actual) -@pytest.mark.xfail( - reason=( - "pint is undecided on how `full_like` should work, so incorrect errors " - "may be expected: hgrecco/pint#882" - ) -) @pytest.mark.parametrize( - "unit,error", + "variant", ( - pytest.param(1, DimensionalityError, id="no_unit"), + "data", pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + pytest.param( + "coords", + marks=pytest.mark.xfail(reason="can't copy quantity into non-quantity"), ), - pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.ms, None, id="compatible_unit"), - pytest.param(unit_registry.s, None, id="identical_unit"), ), - ids=repr, ) -def test_replication_full_like_dataarray(unit, error, dtype): - array = np.linspace(0, 5, 10) * unit_registry.s - data_array = xr.DataArray(data=array, dims="x") +def test_replication_full_like_dataarray(variant, dtype): + # since full_like will strip units and then use the units of the + # fill value, we don't need to try multiple units + unit = unit_registry.m - fill_value = -1 * unit - if error is not None: - with pytest.raises(error): - xr.full_like(data_array, fill_value=fill_value) + variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) - return + array = np.linspace(0, 5, 10) * data_unit + x = np.arange(10) * dim_unit + u = np.linspace(0, 1, 10) * coord_unit + data_array = xr.DataArray(data=array, dims="x", coords={"x": x, "u": ("x", u)}) + + fill_value = -1 * unit_registry.degK - units = {**extract_units(data_array), **{None: unit if unit != 1 else None}} + units = extract_units(data_array) + units[data_array.name] = fill_value.units expected = attach_units( xr.full_like(strip_units(data_array), fill_value=strip_units(fill_value)), units ) @@ -1197,47 +1333,46 @@ def test_replication_full_like_dataarray(unit, error, dtype): assert_identical(expected, actual) -@pytest.mark.xfail( - reason=( - "pint is undecided on how `full_like` should work, so incorrect errors " - "may be expected: hgrecco/pint#882" - ) -) @pytest.mark.parametrize( - "unit,error", + "variant", ( - pytest.param(1, DimensionalityError, id="no_unit"), + "data", pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + pytest.param( + "coords", + marks=pytest.mark.xfail(reason="can't copy quantity into non-quantity"), ), - pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.ms, None, id="compatible_unit"), - pytest.param(unit_registry.s, None, id="identical_unit"), ), - ids=repr, ) -def test_replication_full_like_dataset(unit, error, dtype): - array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s - array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa - x = np.arange(20).astype(dtype) * unit_registry.m - y = np.arange(10).astype(dtype) * unit_registry.m - z = y.to(unit_registry.mm) +def test_replication_full_like_dataset(variant, dtype): + unit = unit_registry.m + + variants = { + "data": ((unit_registry.s, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit, 1), + "coords": ((1, 1), 1, unit), + } + (data_unit1, data_unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(0, 10, 20).astype(dtype) * data_unit1 + array2 = np.linspace(5, 10, 10).astype(dtype) * data_unit2 + x = np.arange(20).astype(dtype) * dim_unit + y = np.arange(10).astype(dtype) * dim_unit + + u = np.linspace(0, 1, 10) * coord_unit ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("y", array2)}, - coords={"x": x, "y": y, "z": ("y", z)}, + coords={"x": x, "y": y, "u": ("y", u)}, ) - fill_value = -1 * unit - if error is not None: - with pytest.raises(error): - xr.full_like(ds, fill_value=fill_value) - - return + fill_value = -1 * unit_registry.degK units = { **extract_units(ds), - **{name: unit if unit != 1 else None for name in ds.data_vars}, + **{name: unit_registry.degK for name in ds.data_vars}, } expected = attach_units( xr.full_like(strip_units(ds), fill_value=strip_units(fill_value)), units @@ -1308,10 +1443,9 @@ def test_where_dataarray(fill_value, unit, error, dtype): def test_where_dataset(fill_value, unit, error, dtype): array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m array2 = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.m - x = np.arange(10) * unit_registry.s - ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) - cond = x < 5 * unit_registry.s + ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}) + cond = array1 < 2 * unit_registry.m fill_value = fill_value * unit if error is not None and not ( @@ -1358,61 +1492,7 @@ def test_dot_dataarray(dtype): assert_identical(expected, actual) -def delete_attrs(*to_delete): - def wrapper(cls): - for item in to_delete: - setattr(cls, item, None) - - return cls - - return wrapper - - -@delete_attrs( - "test_getitem_with_mask", - "test_getitem_with_mask_nd_indexer", - "test_index_0d_string", - "test_index_0d_datetime", - "test_index_0d_timedelta64", - "test_0d_time_data", - "test_index_0d_not_a_time", - "test_datetime64_conversion", - "test_timedelta64_conversion", - "test_pandas_period_index", - "test_1d_reduce", - "test_array_interface", - "test___array__", - "test_copy_index", - "test_concat_number_strings", - "test_concat_fixed_len_str", - "test_concat_mixed_dtypes", - "test_pandas_datetime64_with_tz", - "test_pandas_data", - "test_multiindex", -) -class TestVariable(VariableSubclassobjects): - @staticmethod - def cls(dims, data, *args, **kwargs): - return xr.Variable( - dims, unit_registry.Quantity(data, unit_registry.m), *args, **kwargs - ) - - def example_1d_objects(self): - for data in [ - range(3), - 0.5 * np.arange(3), - 0.5 * np.arange(3, dtype=np.float32), - np.array(["a", "b", "c"], dtype=object), - ]: - yield (self.cls("x", data), data) - - # TODO: remove once pint==0.12 has been released - @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" - ) - def test_real_and_imag(self): - super().test_real_and_imag() - +class TestVariable: @pytest.mark.parametrize( "func", ( @@ -1454,22 +1534,14 @@ def test_aggregation(self, func, dtype): assert_units_equal(expected, actual) assert_allclose(expected, actual) - # TODO: remove once pint==0.12 has been released - @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" - ) def test_aggregate_complex(self): variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m) expected = xr.Variable((), (0.5 + 1j) * unit_registry.m) actual = variable.mean() assert_units_equal(expected, actual) - xr.testing.assert_allclose(expected, actual) + assert_allclose(expected, actual) - # TODO: remove once pint==0.12 has been released - @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" - ) @pytest.mark.parametrize( "func", ( @@ -1526,7 +1598,7 @@ def test_numpy_methods(self, func, unit, error, dtype): actual = func(variable, *args, **kwargs) assert_units_equal(expected, actual) - xr.testing.assert_allclose(expected, actual) + assert_allclose(expected, actual) @pytest.mark.parametrize( "func", (method("item", 5), method("searchsorted", 5)), ids=repr @@ -1586,7 +1658,7 @@ def test_raw_numpy_methods(self, func, unit, error, dtype): actual = func(variable, *args, **kwargs) assert_units_equal(expected, actual) - np.testing.assert_allclose(expected, actual) + assert_duckarray_allclose(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -1609,7 +1681,7 @@ def test_missing_value_detection(self, func): actual = func(variable) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -1655,7 +1727,7 @@ def test_missing_value_fillna(self, unit, error): actual = variable.fillna(value=fill_value) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -1766,12 +1838,8 @@ def test_isel(self, indices, dtype): actual = variable.isel(x=indices) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) - # TODO: remove once pint==0.12 has been released - @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" - ) @pytest.mark.parametrize( "unit,error", ( @@ -1828,7 +1896,7 @@ def test_1d_math(self, func, unit, error, dtype): actual = func(variable, y) assert_units_equal(expected, actual) - xr.testing.assert_allclose(expected, actual) + assert_allclose(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -1877,43 +1945,30 @@ def test_masking(self, func, unit, error, dtype): actual = func(variable, cond, other) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) - def test_squeeze(self, dtype): + @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all")) + def test_squeeze(self, dim, dtype): shape = (2, 1, 3, 1, 1, 2) names = list("abcdef") + dim_lengths = dict(zip(names, shape)) array = np.ones(shape=shape) * unit_registry.m variable = xr.Variable(names, array) + kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {} expected = attach_units( - strip_units(variable).squeeze(), extract_units(variable) + strip_units(variable).squeeze(**kwargs), extract_units(variable) ) - actual = variable.squeeze() + actual = variable.squeeze(**kwargs) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) - - names = tuple(name for name, size in zip(names, shape) if shape == 1) - for name in names: - expected = attach_units( - strip_units(variable).squeeze(dim=name), extract_units(variable) - ) - actual = variable.squeeze(dim=name) - - assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "func", ( method("coarsen", windows={"y": 2}, func=np.mean), - pytest.param( - method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", - reason="quantile / nanquantile not implemented yet", - ), - ), + method("quantile", q=[0.25, 0.75]), pytest.param( method("rank", dim="x"), marks=pytest.mark.xfail(reason="rank not implemented for non-ndarray"), @@ -1940,7 +1995,7 @@ def test_computation(self, func, dtype): actual = func(variable) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -1986,7 +2041,7 @@ def test_stack(self, dtype): actual = variable.stack(z=("x", "y")) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) def test_unstack(self, dtype): array = np.linspace(0, 5, 3 * 10).astype(dtype) * unit_registry.m @@ -1998,7 +2053,7 @@ def test_unstack(self, dtype): actual = variable.unstack(z={"x": 3, "y": 10}) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2038,7 +2093,7 @@ def test_concat(self, unit, error, dtype): actual = xr.Variable.concat([variable, other], dim="y") assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) def test_set_dims(self, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m @@ -2051,7 +2106,7 @@ def test_set_dims(self, dtype): actual = variable.set_dims(dims) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) def test_copy(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m @@ -2064,7 +2119,7 @@ def test_copy(self, dtype): actual = variable.copy(data=other) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2105,45 +2160,43 @@ def test_no_conflicts(self, unit, dtype): assert expected == actual + @pytest.mark.parametrize( + "mode", + [ + "constant", + "mean", + "median", + "reflect", + "edge", + pytest.param( + "linear_ramp", + marks=pytest.mark.xfail( + reason="pint bug: https://github.com/hgrecco/pint/issues/1026" + ), + ), + "maximum", + "minimum", + "symmetric", + "wrap", + ], + ) @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) - def test_pad_constant_values(self, dtype, xr_arg, np_arg): - data = np.arange(4 * 3 * 2).reshape(4, 3, 2).astype(dtype) * unit_registry.m + def test_pad(self, mode, xr_arg, np_arg): + data = np.arange(4 * 3 * 2).reshape(4, 3, 2) * unit_registry.m v = xr.Variable(["x", "y", "z"], data) - actual = v.pad(**xr_arg, mode="constant") - expected = xr.Variable( - v.dims, - np.pad( - v.data.astype(float), np_arg, mode="constant", constant_values=np.nan, - ), + expected = attach_units( + strip_units(v).pad(mode=mode, **xr_arg), extract_units(v), ) - xr.testing.assert_identical(expected, actual) - assert_units_equal(expected, actual) - assert isinstance(actual._data, type(v._data)) + actual = v.pad(mode=mode, **xr_arg) - # for the boolean array, we pad False - data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2) - v = xr.Variable(["x", "y", "z"], data) - actual = v.pad(**xr_arg, mode="constant", constant_values=data.flat[0]) - expected = xr.Variable( - v.dims, - np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]), - ) - xr.testing.assert_identical(actual, expected) assert_units_equal(expected, actual) + assert_equal(actual, expected) @pytest.mark.parametrize( "unit,error", ( - pytest.param( - 1, - DimensionalityError, - id="no_unit", - marks=pytest.mark.xfail( - LooseVersion(pint.__version__) < LooseVersion("0.10.2"), - reason="bug in pint's implementation of np.pad", - ), - ), + pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), @@ -2176,17 +2229,16 @@ def test_pad_unit_constant_value(self, unit, error, dtype): actual = func(variable, constant_values=fill_value) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) class TestDataArray: - @pytest.mark.filterwarnings("error:::pint[.*]") @pytest.mark.parametrize( "variant", ( pytest.param( "with_dims", - marks=pytest.mark.xfail(reason="units in indexes are not supported"), + marks=pytest.mark.xfail(reason="indexes don't support units"), ), "with_coords", "without_coords", @@ -2215,7 +2267,6 @@ def test_init(self, variant, dtype): }.values() ) - @pytest.mark.filterwarnings("error:::pint[.*]") @pytest.mark.parametrize( "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr")) ) @@ -2224,7 +2275,7 @@ def test_init(self, variant, dtype): ( pytest.param( "with_dims", - marks=pytest.mark.xfail(reason="units in indexes are not supported"), + marks=pytest.mark.xfail(reason="indexes don't support units"), ), pytest.param("with_coords"), pytest.param("without_coords"), @@ -2248,10 +2299,6 @@ def test_repr(self, func, variant, dtype): # warnings or errors, but does not check the result func(data_array) - # TODO: remove once pint==0.12 has been released - @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose", - ) @pytest.mark.parametrize( "func", ( @@ -2345,7 +2392,7 @@ def test_unary_operations(self, func, dtype): actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2365,14 +2412,21 @@ def test_binary_operations(self, func, dtype): actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "comparison", ( pytest.param(operator.lt, id="less_than"), pytest.param(operator.ge, id="greater_equal"), - pytest.param(operator.eq, id="equal"), + pytest.param( + operator.eq, + id="equal", + marks=pytest.mark.xfail( + # LooseVersion(pint.__version__) < "0.14", + reason="inconsistencies in the return values of pint's eq", + ), + ), ), ) @pytest.mark.parametrize( @@ -2416,7 +2470,7 @@ def test_comparison_operations(self, comparison, unit, error, dtype): ) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "units,error", @@ -2445,7 +2499,7 @@ def test_univariate_ufunc(self, units, error, dtype): actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__") @pytest.mark.parametrize( @@ -2487,11 +2541,11 @@ def test_bivariate_ufunc(self, unit, error, dtype): actual = np.maximum(data_array, 1 * unit) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) actual = np.maximum(1 * unit, data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize("property", ("T", "imag", "real")) def test_numpy_properties(self, property, dtype): @@ -2508,7 +2562,7 @@ def test_numpy_properties(self, property, dtype): actual = getattr(data_array, property) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2524,7 +2578,7 @@ def test_numpy_methods(self, func, dtype): actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) def test_item(self, dtype): array = np.arange(10).astype(dtype) * unit_registry.m @@ -2535,7 +2589,7 @@ def test_item(self, dtype): expected = func(strip_units(data_array)) * unit_registry.m actual = func(data_array) - np.testing.assert_allclose(expected, actual) + assert_duckarray_allclose(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2650,7 +2704,7 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): actual = func(data_array, *args, **kwargs) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -2673,7 +2727,7 @@ def test_missing_value_detection(self, func, dtype): actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose units in data") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -2691,7 +2745,7 @@ def test_missing_value_filling(self, func, dtype): actual = func(data_array, dim="x") assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2740,7 +2794,7 @@ def test_fillna(self, fill_value, unit, error, dtype): actual = func(data_array, value=value) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) def test_dropna(self, dtype): array = ( @@ -2755,7 +2809,7 @@ def test_dropna(self, dtype): actual = data_array.dropna(dim="x") assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2784,7 +2838,7 @@ def test_isin(self, unit, dtype): actual = data_array.isin(values) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") @@ -2838,7 +2892,7 @@ def test_where(self, variant, unit, error, dtype): actual = data_array.where(**kwargs) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail(reason="uses numpy.vectorize") def test_interpolate_na(self): @@ -2854,7 +2908,7 @@ def test_interpolate_na(self): actual = data_array.interpolate_na(dim="x") assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2895,7 +2949,7 @@ def test_combine_first(self, unit, error, dtype): actual = data_array.combine_first(other) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2984,17 +3038,47 @@ def is_compatible(a, b): pytest.param(unit_registry.m, id="identical_unit"), ), ) - def test_broadcast_like(self, unit, dtype): - array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa - array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_broadcast_like(self, variant, unit, dtype): + original_unit = unit_registry.m + + variants = { + "data": ((original_unit, unit), (1, 1), (1, 1)), + "dims": ((1, 1), (original_unit, unit), (1, 1)), + "coords": ((1, 1), (1, 1), (original_unit, unit)), + } + ( + (data_unit1, data_unit2), + (dim_unit1, dim_unit2), + (coord_unit1, coord_unit2), + ) = variants.get(variant) + + array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * data_unit1 + array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit2 + + x1 = np.arange(2) * dim_unit1 + x2 = np.arange(2) * dim_unit2 + y1 = np.array([0]) * dim_unit1 + y2 = np.arange(3) * dim_unit2 - x1 = np.arange(2) * unit_registry.m - x2 = np.arange(2) * unit - y1 = np.array([0]) * unit_registry.m - y2 = np.arange(3) * unit + u1 = np.linspace(0, 1, 2) * coord_unit1 + u2 = np.linspace(0, 1, 2) * coord_unit2 - arr1 = xr.DataArray(data=array1, coords={"x": x1, "y": y1}, dims=("x", "y")) - arr2 = xr.DataArray(data=array2, coords={"x": x2, "y": y2}, dims=("x", "y")) + arr1 = xr.DataArray( + data=array1, coords={"x": x1, "y": y1, "u": ("x", u1)}, dims=("x", "y") + ) + arr2 = xr.DataArray( + data=array2, coords={"x": x2, "y": y2, "u": ("x", u2)}, dims=("x", "y") + ) expected = attach_units( strip_units(arr1).broadcast_like(strip_units(arr2)), extract_units(arr1) @@ -3002,7 +3086,7 @@ def test_broadcast_like(self, unit, dtype): actual = arr1.broadcast_like(arr2) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -3032,56 +3116,89 @@ def test_broadcast_equals(self, unit, dtype): assert expected == actual + def test_pad(self, dtype): + array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + + data_array = xr.DataArray(data=array, dims="x") + units = extract_units(data_array) + + expected = attach_units(strip_units(data_array).pad(x=(2, 3)), units) + actual = data_array.pad(x=(2, 3)) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) + + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) @pytest.mark.parametrize( "func", ( method("pipe", lambda da: da * 10), - method("assign_coords", y2=("y", np.arange(10) * unit_registry.mm)), + method("assign_coords", w=("y", np.arange(10) * unit_registry.mm)), method("assign_attrs", attr1="value"), - method("rename", x2="x_mm"), - method("swap_dims", {"x": "x2"}), - method( - "expand_dims", - dim={"z": np.linspace(10, 20, 12) * unit_registry.s}, - axis=1, + method("rename", u="v"), + pytest.param( + method("swap_dims", {"x": "u"}), + marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + pytest.param( + method( + "expand_dims", + dim={"z": np.linspace(10, 20, 12) * unit_registry.s}, + axis=1, + ), + marks=pytest.mark.xfail(reason="indexes don't support units"), ), method("drop_vars", "x"), - method("reset_coords", names="x2"), + method("reset_coords", names="u"), method("copy"), method("astype", np.float32), ), ids=repr, ) - def test_content_manipulation(self, func, dtype): - quantity = ( - np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) - * unit_registry.pascal - ) - x = np.arange(quantity.shape[0]) * unit_registry.m - y = np.arange(quantity.shape[1]) * unit_registry.m - x2 = x.to(unit_registry.mm) + def test_content_manipulation(self, func, variant, dtype): + unit = unit_registry.m + + variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + quantity = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit + x = np.arange(quantity.shape[0]) * dim_unit + y = np.arange(quantity.shape[1]) * dim_unit + u = np.linspace(0, 1, quantity.shape[0]) * coord_unit data_array = xr.DataArray( - name="data", + name="a", data=quantity, - coords={"x": x, "x2": ("x", x2), "y": y}, + coords={"x": x, "u": ("x", u), "y": y}, dims=("x", "y"), ) stripped_kwargs = { key: array_strip_units(value) for key, value in func.kwargs.items() } - units = {**{"x_mm": x2.units, "x2": x2.units}, **extract_units(data_array)} + units = extract_units(data_array) + units["u"] = getattr(u, "units", None) + units["v"] = getattr(u, "units", None) expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units) actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) - @pytest.mark.parametrize( - "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr - ) @pytest.mark.parametrize( "unit", ( @@ -3090,22 +3207,20 @@ def test_content_manipulation(self, func, dtype): pytest.param(unit_registry.degK, id="with_unit"), ), ) - def test_content_manipulation_with_units(self, func, unit, dtype): + def test_copy(self, unit, dtype): quantity = np.linspace(0, 10, 20, dtype=dtype) * unit_registry.pascal - x = np.arange(len(quantity)) * unit_registry.m - - data_array = xr.DataArray(data=quantity, coords={"x": x}, dims="x") + new_data = np.arange(20) - kwargs = {key: value * unit for key, value in func.kwargs.items()} + data_array = xr.DataArray(data=quantity, dims="x") expected = attach_units( - func(strip_units(data_array)), {None: unit, "x": x.units} + strip_units(data_array).copy(data=new_data), {None: unit} ) - actual = func(data_array, **kwargs) + actual = data_array.copy(data=new_data * unit) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "indices", @@ -3115,10 +3230,10 @@ def test_content_manipulation_with_units(self, func, unit, dtype): ), ) def test_isel(self, indices, dtype): + # TODO: maybe test for units in indexes? array = np.arange(10).astype(dtype) * unit_registry.s - x = np.arange(len(array)) * unit_registry.m - data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") + data_array = xr.DataArray(data=array, dims="x") expected = attach_units( strip_units(data_array).isel(x=indices), extract_units(data_array) @@ -3126,7 +3241,7 @@ def test_isel(self, indices, dtype): actual = data_array.isel(x=indices) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3171,7 +3286,7 @@ def test_sel(self, raw_values, unit, error, dtype): actual = data_array.sel(x=values) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3216,7 +3331,7 @@ def test_loc(self, raw_values, unit, error, dtype): actual = data_array.loc[{"x": values}] assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3261,8 +3376,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype): actual = data_array.drop_sel(x=values) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) + @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all")) @pytest.mark.parametrize( "shape", ( @@ -3273,36 +3389,22 @@ def test_drop_sel(self, raw_values, unit, error, dtype): pytest.param((1, 10, 1, 20), id="first_and_last_dimension_squeezable"), ), ) - def test_squeeze(self, shape, dtype): + def test_squeeze(self, shape, dim, dtype): + names = "xyzt" + dim_lengths = dict(zip(names, shape)) names = "xyzt" - coords = { - name: np.arange(length).astype(dtype) - * (unit_registry.m if name != "t" else unit_registry.s) - for name, length in zip(names, shape) - } array = np.arange(10 * 20).astype(dtype).reshape(shape) * unit_registry.J - data_array = xr.DataArray( - data=array, coords=coords, dims=tuple(names[: len(shape)]) - ) + data_array = xr.DataArray(data=array, dims=tuple(names[: len(shape)])) + + kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {} expected = attach_units( - strip_units(data_array).squeeze(), extract_units(data_array) + strip_units(data_array).squeeze(**kwargs), extract_units(data_array) ) - actual = data_array.squeeze() + actual = data_array.squeeze(**kwargs) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) - - # try squeezing the dimensions separately - names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) - for index, name in enumerate(names): - expected = attach_units( - strip_units(data_array).squeeze(dim=name), extract_units(data_array) - ) - actual = data_array.squeeze(dim=name) - - assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3310,14 +3412,10 @@ def test_squeeze(self, shape, dtype): ids=repr, ) def test_head_tail_thin(self, func, dtype): + # TODO: works like isel. Maybe also test units in indexes? array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - } - - data_array = xr.DataArray(data=array, coords=coords, dims=("x", "y")) + data_array = xr.DataArray(data=array, dims=("x", "y")) expected = attach_units( func(strip_units(data_array)), extract_units(data_array) @@ -3325,12 +3423,8 @@ def test_head_tail_thin(self, func, dtype): actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) - # TODO: remove once pint==0.12 has been released - @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" - ) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( "func", @@ -3361,7 +3455,7 @@ def test_interp_reindex(self, variant, func, dtype): actual = func(data_array, x=new_x) assert_units_equal(expected, actual) - xr.testing.assert_allclose(expected, actual) + assert_allclose(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3402,12 +3496,8 @@ def test_interp_reindex_indexing(self, func, unit, error, dtype): actual = func(data_array, x=new_x) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) - # TODO: remove once pint==0.12 has been released - @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" - ) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( "func", @@ -3439,7 +3529,7 @@ def test_interp_reindex_like(self, variant, func, dtype): actual = func(data_array, other) assert_units_equal(expected, actual) - xr.testing.assert_allclose(expected, actual) + assert_allclose(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3482,7 +3572,7 @@ def test_interp_reindex_like_indexing(self, func, unit, error, dtype): actual = func(data_array, other) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3505,7 +3595,7 @@ def test_stacking_stacked(self, func, dtype): actual = func(stacked) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): @@ -3529,7 +3619,7 @@ def test_to_unstacked_dataset(self, dtype): actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3564,55 +3654,71 @@ def test_stacking_reordering(self, func, dtype): actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) @pytest.mark.parametrize( "func", ( method("diff", dim="x"), method("differentiate", coord="x"), method("integrate", dim="x"), - pytest.param( - method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", - reason="quantile / nanquantile not implemented yet", - ), - ), + method("quantile", q=[0.25, 0.75]), method("reduce", func=np.sum, dim="x"), pytest.param(lambda x: x.dot(x), id="method_dot"), ), ids=repr, ) - def test_computation(self, func, dtype): - array = ( - np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m - ) + def test_computation(self, func, variant, dtype): + unit = unit_registry.m - x = np.arange(array.shape[0]) * unit_registry.m - y = np.arange(array.shape[1]) * unit_registry.s + variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) + array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit + + x = np.arange(array.shape[0]) * dim_unit + y = np.arange(array.shape[1]) * dim_unit + + u = np.linspace(0, 1, array.shape[0]) * coord_unit + + data_array = xr.DataArray( + data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y") + ) # we want to make sure the output unit is correct - units = { - **extract_units(data_array), - **( - {} - if isinstance(func, (function, method)) - else extract_units(func(array.reshape(-1))) - ), - } + units = extract_units(data_array) + if not isinstance(func, (function, method)): + units.update(extract_units(func(array.reshape(-1)))) expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) - # TODO: remove once pint==0.12 has been released - @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), ) @pytest.mark.parametrize( "func", @@ -3632,25 +3738,37 @@ def test_computation(self, func, dtype): reason="numbagg functions are not supported by pint" ), ), + method("weighted", xr.DataArray(data=np.linspace(0, 1, 10), dims="y")), ), ids=repr, ) - def test_computation_objects(self, func, dtype): - array = ( - np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m - ) + def test_computation_objects(self, func, variant, dtype): + unit = unit_registry.m - x = np.array([0, 0, 1, 2, 2]) * unit_registry.m - y = np.arange(array.shape[1]) * 3 * unit_registry.s + variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit + + x = np.array([0, 0, 1, 2, 2]) * dim_unit + y = np.arange(array.shape[1]) * 3 * dim_unit - data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) + u = np.linspace(0, 1, 5) * coord_unit + + data_array = xr.DataArray( + data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y") + ) units = extract_units(data_array) expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() assert_units_equal(expected, actual) - xr.testing.assert_allclose(expected, actual) + assert_allclose(expected, actual) def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m @@ -3665,33 +3783,47 @@ def test_resample(self, dtype): actual = func(data_array).mean() assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) @pytest.mark.parametrize( "func", ( - method("assign_coords", z=(["x"], np.arange(5) * unit_registry.s)), + method("assign_coords", z=("x", np.arange(5) * unit_registry.s)), method("first"), method("last"), - pytest.param( - method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", - reason="quantile / nanquantile not implemented yet", - ), - ), + method("quantile", q=[0.25, 0.5, 0.75], dim="x"), ), ids=repr, ) - def test_grouped_operations(self, func, dtype): - array = ( - np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m - ) + def test_grouped_operations(self, func, variant, dtype): + unit = unit_registry.m - x = np.arange(array.shape[0]) * unit_registry.m - y = np.arange(array.shape[1]) * 3 * unit_registry.s + variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + array = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit + + x = np.arange(array.shape[0]) * dim_unit + y = np.arange(array.shape[1]) * 3 * dim_unit + + u = np.linspace(0, 1, array.shape[0]) * coord_unit - data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) + data_array = xr.DataArray( + data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y") + ) units = {**extract_units(data_array), **{"z": unit_registry.s, "q": None}} stripped_kwargs = { @@ -3708,10 +3840,9 @@ def test_grouped_operations(self, func, dtype): actual = func(data_array.groupby("y")) assert_units_equal(expected, actual) - xr.testing.assert_identical(expected, actual) + assert_identical(expected, actual) -@pytest.mark.filterwarnings("error::pint.UnitStrippedWarning") class TestDataset: @pytest.mark.parametrize( "unit,error", @@ -3796,8 +3927,7 @@ def test_init(self, shared, unit, error, dtype): ( "data", pytest.param( - "dims", - marks=pytest.mark.xfail(reason="units in indexes are not supported"), + "dims", marks=pytest.mark.xfail(reason="indexes don't support units"), ), "coords", ), @@ -4313,7 +4443,7 @@ def test_combine_first(self, variant, unit, error, dtype): ( "data", pytest.param( - "dims", marks=pytest.mark.xfail(reason="units in indexes not supported") + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") ), "coords", ), @@ -4473,6 +4603,19 @@ def test_broadcast_equals(self, unit, dtype): assert expected == actual + def test_pad(self, dtype): + a = np.linspace(0, 5, 10).astype(dtype) * unit_registry.Pa + b = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.degK + + ds = xr.Dataset({"a": ("x", a), "b": ("x", b)}) + units = extract_units(ds) + + expected = attach_units(strip_units(ds).pad(x=(2, 3)), units) + actual = ds.pad(x=(2, 3)) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) + @pytest.mark.parametrize( "func", (method("unstack"), method("reset_index", "v"), method("reorder_levels")), @@ -5011,13 +5154,7 @@ def test_interp_reindex_like_indexing(self, func, unit, error, dtype): method("diff", dim="x"), method("differentiate", coord="x"), method("integrate", coord="x"), - pytest.param( - method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", - reason="nanquantile not implemented yet", - ), - ), + method("quantile", q=[0.25, 0.75]), method("reduce", func=np.sum, dim="x"), method("map", np.fabs), ), @@ -5067,13 +5204,7 @@ def test_computation(self, func, variant, dtype): "func", ( method("groupby", "x"), - pytest.param( - method("groupby_bins", "x", bins=2), - marks=pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", - reason="needs assert_allclose but that does not work with pint", - ), - ), + method("groupby_bins", "x", bins=2), method("coarsen", x=2), pytest.param( method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units") @@ -5084,6 +5215,7 @@ def test_computation(self, func, variant, dtype): reason="numbagg functions are not supported by pint" ), ), + method("weighted", xr.DataArray(data=np.linspace(0, 1, 5), dims="y")), ), ids=repr, ) @@ -5122,11 +5254,7 @@ def test_computation_objects(self, func, variant, dtype): actual = func(ds).mean(*args) assert_units_equal(expected, actual) - # TODO: remove once pint 0.12 has been released - if LooseVersion(pint.__version__) <= "0.12": - assert_equal(expected, actual) - else: - assert_allclose(expected, actual) + assert_allclose(expected, actual) @pytest.mark.parametrize( "variant", @@ -5177,13 +5305,7 @@ def test_resample(self, variant, dtype): method("assign_coords", v=("x", np.arange(5) * unit_registry.s)), method("first"), method("last"), - pytest.param( - method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.12", - reason="nanquantile not implemented", - ), - ), + method("quantile", q=[0.25, 0.5, 0.75], dim="x"), ), ids=repr, ) From 03d409ec35034d78a3a625dcaf1744117587b93c Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Thu, 2 Jul 2020 13:39:00 -0700 Subject: [PATCH 04/15] Improve the speed of from_dataframe with a MultiIndex (by 40x!) (#4184) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add MultiIndexSeries.time_to_xarray() benchmark * Improve the speed of from_dataframe with a MultiIndex Fixes GH-2459 Before: pandas.MultiIndexSeries.time_to_xarray ======= ========= ========== -- subset ------- -------------------- dtype True False ======= ========= ========== int 505±0ms 37.1±0ms float 485±0ms 38.3±0ms ======= ========= ========== After: pandas.MultiIndexSeries.time_to_xarray ======= ========= ========== -- subset ------- -------------------- dtype True False ======= ========= ========== int 11.5±0ms 39.2±0ms float 12.5±0ms 26.6±0ms ======= ========= ========== There are still some cases where we have to fall back to the existing slow implementation, but hopefully they should now be relatively rare. * remove unused import * Simplify converting MultiIndex dataframes * remove comments * remove types with NA * more multiindex dataframe tests * add whats new note * Preserve order of MultiIndex levels in from_dataframe * Add todo note * Rewrite from_dataframe to avoid passing around a dataframe * Require that MultiIndexes are unique even with sparse=True * clarify comment --- asv_bench/benchmarks/pandas.py | 24 ++++++++++++ doc/whats-new.rst | 10 +++-- xarray/core/dataset.py | 67 +++++++++++++++++++++++----------- xarray/core/indexes.py | 13 ++++--- xarray/tests/test_dataset.py | 43 ++++++++++++++++++++++ 5 files changed, 127 insertions(+), 30 deletions(-) create mode 100644 asv_bench/benchmarks/pandas.py diff --git a/asv_bench/benchmarks/pandas.py b/asv_bench/benchmarks/pandas.py new file mode 100644 index 00000000000..42ef18ac0c2 --- /dev/null +++ b/asv_bench/benchmarks/pandas.py @@ -0,0 +1,24 @@ +import numpy as np +import pandas as pd + +from . import parameterized + + +class MultiIndexSeries: + def setup(self, dtype, subset): + data = np.random.rand(100000).astype(dtype) + index = pd.MultiIndex.from_product( + [ + list("abcdefhijk"), + list("abcdefhijk"), + pd.date_range(start="2000-01-01", periods=1000, freq="B"), + ] + ) + series = pd.Series(data, index) + if subset: + series = series[::3] + self.series = series + + @parameterized(["dtype", "subset"], ([int, float], [True, False])) + def time_to_xarray(self, dtype, subset): + self.series.to_xarray() diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e4223f2b4e0..5dc39da5a06 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,7 +49,10 @@ Enhancements For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially rather than interpolating in multidimensional space. (:issue:`2223`) By `Keisuke Fujii `_. -- :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep +- Major performance improvement for :py:meth:`Dataset.from_dataframe` when the + dataframe has a MultiIndex (:pull:`4184`). + By `Stephan Hoyer `_. + - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep coordinate attributes (:pull:`4103`). By `Oriol Abril `_. New Features @@ -133,8 +136,9 @@ Bug fixes By `Deepak Cherian `_. - ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`) By `Huite Bootsma `_. -- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) - By `Keisuke Fujii `_. +- Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. + (:issue:`3951`, :issue:`4186`) + By `Keisuke Fujii `_ and `Stephan Hoyer `_. - Fix renaming of coords when one or more stacked coords is not in sorted order during stack+groupby+apply operations. (:issue:`3287`, :pull:`3906`) By `Spencer Hill `_ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b46b1d6dce0..5bfddaa710b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4543,11 +4543,10 @@ def to_dataframe(self): return self._to_dataframe(self.dims) def _set_sparse_data_from_dataframe( - self, dataframe: pd.DataFrame, dims: tuple + self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple ) -> None: from sparse import COO - idx = dataframe.index if isinstance(idx, pd.MultiIndex): coords = np.stack([np.asarray(code) for code in idx.codes], axis=0) is_sorted = idx.is_lexsorted() @@ -4557,11 +4556,7 @@ def _set_sparse_data_from_dataframe( is_sorted = True shape = (idx.size,) - for name, series in dataframe.items(): - # Cast to a NumPy array first, in case the Series is a pandas - # Extension array (which doesn't have a valid NumPy dtype) - values = np.asarray(series) - + for name, values in arrays: # In virtually all real use cases, the sparse array will now have # missing values and needs a fill_value. For consistency, don't # special case the rare exceptions (e.g., dtype=int without a @@ -4580,18 +4575,36 @@ def _set_sparse_data_from_dataframe( self[name] = (dims, data) def _set_numpy_data_from_dataframe( - self, dataframe: pd.DataFrame, dims: tuple + self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple ) -> None: - idx = dataframe.index - if isinstance(idx, pd.MultiIndex): - # expand the DataFrame to include the product of all levels - full_idx = pd.MultiIndex.from_product(idx.levels, names=idx.names) - dataframe = dataframe.reindex(full_idx) - shape = tuple(lev.size for lev in idx.levels) - else: - shape = (idx.size,) - for name, series in dataframe.items(): - data = np.asarray(series).reshape(shape) + if not isinstance(idx, pd.MultiIndex): + for name, values in arrays: + self[name] = (dims, values) + return + + shape = tuple(lev.size for lev in idx.levels) + indexer = tuple(idx.codes) + + # We already verified that the MultiIndex has all unique values, so + # there are missing values if and only if the size of output arrays is + # larger that the index. + missing_values = np.prod(shape) > idx.shape[0] + + for name, values in arrays: + # NumPy indexing is much faster than using DataFrame.reindex() to + # fill in missing values: + # https://stackoverflow.com/a/35049899/809705 + if missing_values: + dtype, fill_value = dtypes.maybe_promote(values.dtype) + data = np.full(shape, fill_value, dtype) + else: + # If there are no missing values, keep the existing dtype + # instead of promoting to support NA, e.g., keep integer + # columns as integers. + # TODO: consider removing this special case, which doesn't + # exist for sparse=True. + data = np.zeros(shape, values.dtype) + data[indexer] = values self[name] = (dims, data) @classmethod @@ -4631,7 +4644,19 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas if not dataframe.columns.is_unique: raise ValueError("cannot convert DataFrame with non-unique columns") - idx, dataframe = remove_unused_levels_categories(dataframe.index, dataframe) + idx = remove_unused_levels_categories(dataframe.index) + + if isinstance(idx, pd.MultiIndex) and not idx.is_unique: + raise ValueError( + "cannot convert a DataFrame with a non-unique MultiIndex into xarray" + ) + + # Cast to a NumPy array first, in case the Series is a pandas Extension + # array (which doesn't have a valid NumPy dtype) + # TODO: allow users to control how this casting happens, e.g., by + # forwarding arguments to pandas.Series.to_numpy? + arrays = [(k, np.asarray(v)) for k, v in dataframe.items()] + obj = cls() if isinstance(idx, pd.MultiIndex): @@ -4647,9 +4672,9 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas obj[index_name] = (dims, idx) if sparse: - obj._set_sparse_data_from_dataframe(dataframe, dims) + obj._set_sparse_data_from_dataframe(idx, arrays, dims) else: - obj._set_numpy_data_from_dataframe(dataframe, dims) + obj._set_numpy_data_from_dataframe(idx, arrays, dims) return obj def to_dask_dataframe(self, dim_order=None, set_index=False): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index a4a5fa2c466..6b7220fdfd4 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -9,7 +9,7 @@ from .variable import Variable -def remove_unused_levels_categories(index, dataframe=None): +def remove_unused_levels_categories(index: pd.Index) -> pd.Index: """ Remove unused levels from MultiIndex and unused categories from CategoricalIndex """ @@ -25,14 +25,15 @@ def remove_unused_levels_categories(index, dataframe=None): else: level = level[index.codes[i]] levels.append(level) + # TODO: calling from_array() reorders MultiIndex levels. It would + # be best to avoid this, if possible, e.g., by using + # MultiIndex.remove_unused_levels() (which does not reorder) on the + # part of the MultiIndex that is not categorical, or by fixing this + # upstream in pandas. index = pd.MultiIndex.from_arrays(levels, names=index.names) elif isinstance(index, pd.CategoricalIndex): index = index.remove_unused_categories() - - if dataframe is None: - return index - dataframe = dataframe.set_index(index) - return dataframe.index, dataframe + return index class Indexes(collections.abc.Mapping): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 0c4082a553e..62e2dd5c4f2 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4013,6 +4013,49 @@ def test_to_and_from_empty_dataframe(self): assert len(actual) == 0 assert expected.equals(actual) + def test_from_dataframe_multiindex(self): + index = pd.MultiIndex.from_product([["a", "b"], [1, 2, 3]], names=["x", "y"]) + df = pd.DataFrame({"z": np.arange(6)}, index=index) + + expected = Dataset( + {"z": (("x", "y"), [[0, 1, 2], [3, 4, 5]])}, + coords={"x": ["a", "b"], "y": [1, 2, 3]}, + ) + actual = Dataset.from_dataframe(df) + assert_identical(actual, expected) + + df2 = df.iloc[[3, 2, 1, 0, 4, 5], :] + actual = Dataset.from_dataframe(df2) + assert_identical(actual, expected) + + df3 = df.iloc[:4, :] + expected3 = Dataset( + {"z": (("x", "y"), [[0, 1, 2], [3, np.nan, np.nan]])}, + coords={"x": ["a", "b"], "y": [1, 2, 3]}, + ) + actual = Dataset.from_dataframe(df3) + assert_identical(actual, expected3) + + df_nonunique = df.iloc[[0, 0], :] + with raises_regex(ValueError, "non-unique MultiIndex"): + Dataset.from_dataframe(df_nonunique) + + def test_from_dataframe_unsorted_levels(self): + # regression test for GH-4186 + index = pd.MultiIndex( + levels=[["b", "a"], ["foo"]], codes=[[0, 1], [0, 0]], names=["lev1", "lev2"] + ) + df = pd.DataFrame({"c1": [0, 2], "c2": [1, 3]}, index=index) + expected = Dataset( + { + "c1": (("lev1", "lev2"), [[0], [2]]), + "c2": (("lev1", "lev2"), [[1], [3]]), + }, + coords={"lev1": ["b", "a"], "lev2": ["foo"]}, + ) + actual = Dataset.from_dataframe(df) + assert_identical(actual, expected) + def test_from_dataframe_non_unique_columns(self): # regression test for GH449 df = pd.DataFrame(np.zeros((2, 2))) From 329cefb99220042a586c2b5fdbea679fd8a89cd5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 2 Jul 2020 20:51:10 +0000 Subject: [PATCH 05/15] Fix to_unstacked_dataset for single dimension variables. (#4094) --- doc/whats-new.rst | 2 ++ xarray/core/dataarray.py | 2 +- xarray/tests/test_dataset.py | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5dc39da5a06..c7a2b85561b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -171,6 +171,8 @@ Bug fixes By `Mathias Hauser `_. - Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`) By `Benoit Bovy `_. +- Fix :py:meth:`DataArray.to_unstacked_dataset` for single-dimension variables. (:issue:`4049`) + By `Deepak Cherian `_ - Fix :py:func:`open_rasterio` for ``WarpedVRT`` with specified ``src_crs``. (:pull:`4104`) By `Dave Cole `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0ce76a5e23a..dbc4877fa1d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1961,7 +1961,7 @@ def to_unstacked_dataset(self, dim, level=0): # pull variables out of datarray data_dict = {} for k in variables: - data_dict[k] = self.sel({variable_dim: k}).squeeze(drop=True) + data_dict[k] = self.sel({variable_dim: k}, drop=True).squeeze(drop=True) # unstacked dataset return Dataset(data_dict) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 62e2dd5c4f2..9037013cc79 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3031,6 +3031,14 @@ def test_to_stacked_array_dtype_dims(self): assert y.dims == ("x", "features") def test_to_stacked_array_to_unstacked_dataset(self): + + # single dimension: regression test for GH4049 + arr = xr.DataArray(np.arange(3), coords=[("x", [0, 1, 2])]) + data = xr.Dataset({"a": arr, "b": arr}) + stacked = data.to_stacked_array("y", sample_dims=["x"]) + unstacked = stacked.to_unstacked_dataset("y") + assert_identical(unstacked, data) + # make a two dimensional dataset a, b = create_test_stacked_array() D = xr.Dataset({"a": a, "b": b}) From 834d4c461e523d2a1873617b027d3e20cf255bd2 Mon Sep 17 00:00:00 2001 From: raphael dussin Date: Thu, 2 Jul 2020 16:51:55 -0400 Subject: [PATCH 06/15] Allow passing axis kwargs to plot (#4020) * fix facecolor plot * temp version * finish fix facecolor + solves #3169 * black formatting * add testing * allow cartopy projection to be a kwarg * fix PEP8 comment * black formatting * fix testing, plt not in parameterize * fix testing, allows for no matplotlib * black formating * fix tests without matplotlib * fix some mistakes * isort, mypy * fix mypy * remove empty line * correction from review * correction from 2nd review * updated tests * updated tests * black formatting * follow up correction from review * fix tests * fix tests again * fix bug in tests * fix pb in tests * remove useless line * clean up tests * fix * Add whats-new Co-authored-by: dcherian --- doc/plotting.rst | 9 +++++---- doc/whats-new.rst | 3 +++ xarray/plot/plot.py | 15 +++++++++------ xarray/plot/utils.py | 14 ++++++++++---- xarray/tests/__init__.py | 1 + xarray/tests/test_plot.py | 40 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 68 insertions(+), 14 deletions(-) diff --git a/doc/plotting.rst b/doc/plotting.rst index 72248e31b1e..02ddba1e00c 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -743,12 +743,13 @@ This script will plot the air temperature on a map. air = xr.tutorial.open_dataset("air_temperature").air - ax = plt.axes(projection=ccrs.Orthographic(-80, 35)) - air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()) - ax.set_global() + p = air.isel(time=0).plot( + subplot_kws=dict(projection=ccrs.Orthographic(-80, 35), facecolor="gray"), + transform=ccrs.PlateCarree()) + p.axes.set_global() @savefig plotting_maps_cartopy.png width=100% - ax.coastlines() + p.axes.coastlines() When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c7a2b85561b..43b248670f5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,9 @@ Enhancements By `Stephan Hoyer `_. - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep coordinate attributes (:pull:`4103`). By `Oriol Abril `_. +- Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``. + This works for both single axes plots and FacetGrid plots. + By `Raphael Dussin Date: Fri, 3 Jul 2020 02:51:32 +0000 Subject: [PATCH 07/15] Bump minimum versions for 0.16 release (#4175) * Bump minimum versions for 0.16 release * Undo scipy bump * fix bumps --- ci/requirements/py36-min-all-deps.yml | 7 +++---- ci/requirements/py36-min-nep18.yml | 5 ++--- doc/whats-new.rst | 3 +++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index a72cd000680..c11c52bd19f 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -15,8 +15,8 @@ dependencies: - cfgrib=0.9 - cftime=1.0 - coveralls - - dask=2.5 - - distributed=2.5 + - dask=2.9 + - distributed=2.9 - flake8 - h5netcdf=0.7 - h5py=2.9 # Policy allows for 2.10, but it's a conflict-fest @@ -26,11 +26,10 @@ dependencies: - isort - lxml=4.4 # Optional dep of pydap - matplotlib=3.1 - - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - mypy=0.761 # Must match .pre-commit-config.yaml - nc-time-axis=1.2 - netcdf4=1.4 - - numba=0.44 + - numba=0.46 - numpy=1.15 - pandas=0.25 # - pint # See py36-min-nep18.yml diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index dd543ce4ddf..a9f12abfeae 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -6,9 +6,8 @@ dependencies: # require drastically newer packages than everything else - python=3.6 - coveralls - - dask=2.5 - - distributed=2.5 - - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 + - dask=2.9 + - distributed=2.9 - numpy=1.17 - pandas=0.25 - pint=0.13 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 43b248670f5..378dfb30f25 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,9 @@ v0.16.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ +- Minimum supported versions for the following packages have changed: ``dask >=2.9``, + ``distributed>=2.9``. + By `Deepak Cherian `_ - ``groupby`` operations will restore coord dimension order. Pass ``restore_coord_dims=False`` to revert to previous behavior. - :meth:`DataArray.transpose` will now transpose coordinates by default. From 03c8562bda56cbd90e571a5beb41f44fba064813 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 4 Jul 2020 19:24:14 +0200 Subject: [PATCH 08/15] get the colorbar label via public methods (#4201) --- xarray/tests/test_plot.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 610730e9eb2..788c26f3b39 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -88,6 +88,13 @@ def easy_array(shape, start=0, stop=1): return a.reshape(shape) +def get_colorbar_label(colorbar): + if colorbar.orientation == "vertical": + return colorbar.ax.get_ylabel() + else: + return colorbar.ax.get_xlabel() + + @requires_matplotlib class PlotTestCase: @pytest.fixture(autouse=True) @@ -1414,7 +1421,7 @@ def test_facetgrid_cbar_kwargs(self): # catch contour case if hasattr(g, "cbar"): - assert g.cbar._label == "test_label" + assert get_colorbar_label(g.cbar) == "test_label" def test_facetgrid_no_cbar_ax(self): a = easy_array((10, 15, 2, 3)) From 64c13918492a4b9ef5431ca7461e039a4bd69c95 Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 7 Jul 2020 16:50:29 +0200 Subject: [PATCH 09/15] pin isort (#4206) * pin isort for now * also pin isort for all other CI * pin isort to the exact version --- ci/requirements/py36-min-all-deps.yml | 2 +- ci/requirements/py36.yml | 2 +- ci/requirements/py37-windows.yml | 2 +- ci/requirements/py37.yml | 2 +- ci/requirements/py38-all-but-dask.yml | 2 +- ci/requirements/py38.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index c11c52bd19f..b14582ca9c2 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -23,7 +23,7 @@ dependencies: - hdf5=1.10 - hypothesis - iris=2.2 - - isort + - isort=4.3.21 - lxml=4.4 # Optional dep of pydap - matplotlib=3.1 - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index a500173f277..9ff2c6c49ca 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -19,7 +19,7 @@ dependencies: - hdf5 - hypothesis - iris - - isort + - isort=4.3.21 - lxml # Optional dep of pydap - matplotlib - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml index e9e5c7a900a..19285a35eca 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/py37-windows.yml @@ -19,7 +19,7 @@ dependencies: - hdf5 - hypothesis - iris - - isort + - isort=4.3.21 - lxml # Optional dep of pydap - matplotlib - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index dba3926596e..3fcb4efd009 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -19,7 +19,7 @@ dependencies: - hdf5 - hypothesis - iris - - isort + - isort=4.3.21 - lxml # Optional dep of pydap - matplotlib - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index a375d9e1e5a..4e6f0dd5387 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -16,7 +16,7 @@ dependencies: - h5py - hdf5 - hypothesis - - isort + - isort=4.3.21 - lxml # Optional dep of pydap - matplotlib - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml index 7dff3a1bd97..4598fcd2790 100644 --- a/ci/requirements/py38.yml +++ b/ci/requirements/py38.yml @@ -19,7 +19,7 @@ dependencies: - hdf5 - hypothesis - iris - - isort + - isort=4.3.21 - lxml # Optional dep of pydap - matplotlib - mypy=0.780 # Must match .pre-commit-config.yaml From f3ca63a4ac5c091a92085b477a0d34c08df88aa6 Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 7 Jul 2020 16:52:26 +0200 Subject: [PATCH 10/15] fix sphinx warnings (#4199) * fix a link * remove the earlier attempts to document .str and .dt * fail warnings on RTD * disable fail_on_warning again --- doc/api.rst | 2 -- doc/whats-new.rst | 2 +- readthedocs.yml | 3 +++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 603e3e8f6cf..72a6dd4d97a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -360,7 +360,6 @@ Computation DataArray.rolling_exp DataArray.weighted DataArray.coarsen - DataArray.dt DataArray.resample DataArray.get_axis_num DataArray.diff @@ -369,7 +368,6 @@ Computation DataArray.differentiate DataArray.integrate DataArray.polyfit - DataArray.str DataArray.map_blocks diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 378dfb30f25..b33f817e9e1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,7 +59,7 @@ Enhancements coordinate attributes (:pull:`4103`). By `Oriol Abril `_. - Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``. This works for both single axes plots and FacetGrid plots. - By `Raphael Dussin `_. New Features ~~~~~~~~~~~~ diff --git a/readthedocs.yml b/readthedocs.yml index 88aee82a44b..88abb57ae43 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -6,4 +6,7 @@ build: conda: environment: ci/requirements/doc.yml +sphinx: + fail_on_warning: false + formats: [] From c90d6dc1f68bcc857c5e0a19c8da75e68b76673f Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 11 Jul 2020 16:33:18 -0400 Subject: [PATCH 11/15] Minor reorg of whatsnew for 0.16.0 (#4216) --- doc/whats-new.rst | 75 ++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b33f817e9e1..eda89f8c0c1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,9 +16,21 @@ What's New .. _whats-new.0.16.0: -v0.16.0 (unreleased) +v0.16.0 (2020-07-11) --------------------- +Thank you to all contributors who built this release: + +Akio Taniguchi, Andrew Williams, Aurélien Ponte, Benoit Bovy, Dave Cole, David +Brochart, Deepak Cherian, Elliott Sales de Andrade, Etienne Combrisson, Hossein +Madadi, Huite, Joe Hamman, Kai Mühlbauer, Keisuke Fujii, Maik Riechert, Marek +Jacob, Mathias Hauser, Matthieu Ancellin, Maximilian Roos, Noah D Brenowitz, +Oriol Abril, Pascal Bourgault, Phillip Butcher, Prajjwal Nijhara, Ray Bell, Ryan +Abernathey, Ryan May, Spencer Clark, Spencer Hill, Srijan Saurav, Stephan Hoyer, +Taher Chegini, Todd, Tom Nicholas, Yohai Bar Sinai, Yunus Sevinchan, +arabidopsis, aurghs, clausmichele, dmey, johnomotani, keewis, raphael dussin, +risebell + Breaking changes ~~~~~~~~~~~~~~~~ @@ -46,21 +58,6 @@ Breaking changes default (:issue:`4176`) By `Stephan Hoyer `_. -Enhancements -~~~~~~~~~~~~ -- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` - For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially - rather than interpolating in multidimensional space. (:issue:`2223`) - By `Keisuke Fujii `_. -- Major performance improvement for :py:meth:`Dataset.from_dataframe` when the - dataframe has a MultiIndex (:pull:`4184`). - By `Stephan Hoyer `_. - - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep - coordinate attributes (:pull:`4103`). By `Oriol Abril `_. -- Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``. - This works for both single axes plots and FacetGrid plots. - By `Raphael Dussin `_. - New Features ~~~~~~~~~~~~ - :py:meth:`DataArray.argmin` and :py:meth:`DataArray.argmax` now support @@ -70,15 +67,19 @@ New Features (:pull:`3936`) By `John Omotani `_, thanks to `Keisuke Fujii `_ for work in :pull:`1469`. +- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`). + By `Andrew Williams `_ and `Robin Beer `_. +- Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, + :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) + By `Todd Jennings `_ +- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting + polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`) + By `Pascal Bourgault `_. - Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`). By `Pascal Bourgault `_. - ``chunks='auto'`` is now supported in the ``chunks`` argument of :py:meth:`Dataset.chunk`. (:issue:`4055`) By `Andrew Williams `_ -- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`). - By `Andrew Williams `_ and `Robin Beer `_. -- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`) - By `Pascal Bourgault `_. - Control over attributes of result in :py:func:`merge`, :py:func:`concat`, :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) @@ -88,15 +89,6 @@ New Features the exception when a dimension passed to ``isel`` is not present with a warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`) By `John Omotani `_ -- Limited the length of array items with long string reprs to a - reasonable width (:pull:`3900`) - By `Maximilian Roos `_ -- Limited the number of lines of large arrays when numpy reprs would have greater than 40. - (:pull:`3905`) - By `Maximilian Roos `_ -- Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, - :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) - By `Todd Jennings `_ - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`, :pull:`4135`) By `Kai Mühlbauer `_ and `Pascal Bourgault `_. @@ -128,7 +120,30 @@ New Features (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`, :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas independently of time decoding (:issue:`1621`) - `Aureliana Barghini ` + `Aureliana Barghini `_ + +Enhancements +~~~~~~~~~~~~ +- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` + For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially + rather than interpolating in multidimensional space. (:issue:`2223`) + By `Keisuke Fujii `_. +- Major performance improvement for :py:meth:`Dataset.from_dataframe` when the + dataframe has a MultiIndex (:pull:`4184`). + By `Stephan Hoyer `_. + - :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep + coordinate attributes (:pull:`4103`). By `Oriol Abril `_. +- Axes kwargs such as ``facecolor`` can now be passed to :py:meth:`DataArray.plot` in ``subplot_kws``. + This works for both single axes plots and FacetGrid plots. + By `Raphael Dussin `_. +- Array items with long string reprs are now limited to a + reasonable width (:pull:`3900`) + By `Maximilian Roos `_ +- Large arrays whose numpy reprs would have greater than 40 lines are now + limited to a reasonable length. + (:pull:`3905`) + By `Maximilian Roos `_ + Bug fixes ~~~~~~~~~ From 567692634a56a13076a3ad39a46927a613d9e13f Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 11 Jul 2020 13:35:44 -0700 Subject: [PATCH 12/15] Release v0.16.0 From c8d452804dd0058ce5d3429215b4152ac3322a37 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 11 Jul 2020 13:41:19 -0700 Subject: [PATCH 13/15] New whatsnew section --- doc/whats-new.rst | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index eda89f8c0c1..c4f6108629c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,31 @@ What's New np.random.seed(123456) +.. _whats-new.0.16.1: + +v0.16.1 (unreleased) +--------------------- + +Breaking changes +~~~~~~~~~~~~~~~~ + + +New Features +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + + .. _whats-new.0.16.0: v0.16.0 (2020-07-11) From 7bf9df9d75c40bcbf2dd28c47204529a76561a3f Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 11 Jul 2020 14:17:02 -0700 Subject: [PATCH 14/15] Add 0.16.0 release summary --- doc/whats-new.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c4f6108629c..d086d4f411d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,7 +44,11 @@ Internal Changes v0.16.0 (2020-07-11) --------------------- -Thank you to all contributors who built this release: +This release adds `xarray.cov` & `xarray.corr` for covariance & correlation +respectively; the `idxmax` & `idxmin` methods, the `polyfit` method & +`xarray.polyval` for fitting polynomials, as well as a number of documentation +improvements, other features, and bug fixes. Many thanks to all 44 contributors +who contributed to this release: Akio Taniguchi, Andrew Williams, Aurélien Ponte, Benoit Bovy, Dave Cole, David Brochart, Deepak Cherian, Elliott Sales de Andrade, Etienne Combrisson, Hossein From 52043bc57f20438e8923790bca90b646c82442ad Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Mon, 13 Jul 2020 16:07:43 +0100 Subject: [PATCH 15/15] Add initial cupy tests (#4214) * Add initial cupy tests * Linting * Docstrings --- xarray/tests/test_cupy.py | 50 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 xarray/tests/test_cupy.py diff --git a/xarray/tests/test_cupy.py b/xarray/tests/test_cupy.py new file mode 100644 index 00000000000..624e78d9271 --- /dev/null +++ b/xarray/tests/test_cupy.py @@ -0,0 +1,50 @@ +import numpy as np +import pandas as pd +import pytest + +import xarray as xr + +cp = pytest.importorskip("cupy") + + +@pytest.fixture +def toy_weather_data(): + """Construct the example DataSet from the Toy weather data example. + + http://xarray.pydata.org/en/stable/examples/weather-data.html + + Here we construct the DataSet exactly as shown in the example and then + convert the numpy arrays to cupy. + + """ + np.random.seed(123) + times = pd.date_range("2000-01-01", "2001-12-31", name="time") + annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28)) + + base = 10 + 15 * annual_cycle.reshape(-1, 1) + tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3) + tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3) + + ds = xr.Dataset( + { + "tmin": (("time", "location"), tmin_values), + "tmax": (("time", "location"), tmax_values), + }, + {"time": times, "location": ["IA", "IN", "IL"]}, + ) + + ds.tmax.data = cp.asarray(ds.tmax.data) + ds.tmin.data = cp.asarray(ds.tmin.data) + + return ds + + +def test_cupy_import(): + """Check the import worked.""" + assert cp + + +def test_check_data_stays_on_gpu(toy_weather_data): + """Perform some operations and check the data stays on the GPU.""" + freeze = (toy_weather_data["tmin"] <= 0).groupby("time.month").mean("time") + assert isinstance(freeze.data, cp.core.core.ndarray)