Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linear interp with NaNs in nd indexer #4233

Merged
merged 10 commits into from
Aug 27, 2020
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ Bug fixes
arrays (:issue:`4341`). By `Spencer Clark <https://github.com/spencerkclark>`_.
- Fix :py:func:`xarray.apply_ufunc` with ``vectorize=True`` and ``exclude_dims`` (:issue:`3890`).
By `Mathias Hauser <https://github.com/mathause>`_.
- Fix `KeyError` when doing linear interpolation to an nd `DataArray`
that contains NaNs (:pull:`4233`).
By `Jens Svensmark <https://github.com/jenssss>`_

Documentation
~~~~~~~~~~~~~
Expand Down
8 changes: 4 additions & 4 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,9 +1420,9 @@ def interp(
----------
coords : dict, optional
Mapping from dimension names to the new coordinates.
new coordinate can be an scalar, array-like or DataArray.
If DataArrays are passed as new coordates, their dimensions are
used for the broadcasting.
New coordinate can be an scalar, array-like or DataArray.
If DataArrays are passed as new coordinates, their dimensions are
used for the broadcasting. Missing values are skipped.
method : str, default: "linear"
The method used to interpolate. Choose from

Expand Down Expand Up @@ -1492,7 +1492,7 @@ def interp_like(
other : Dataset or DataArray
Object with an 'indexes' attribute giving a mapping from dimension
names to an 1d array-like, which provides coordinates upon
which to index the variables in this dataset.
which to index the variables in this dataset. Missing values are skipped.
method : str, default: "linear"
The method used to interpolate. Choose from

Expand Down
6 changes: 3 additions & 3 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2599,8 +2599,8 @@ def interp(
coords : dict, optional
Mapping from dimension names to the new coordinates.
New coordinate can be a scalar, array-like or DataArray.
If DataArrays are passed as new coordates, their dimensions are
used for the broadcasting.
If DataArrays are passed as new coordinates, their dimensions are
used for the broadcasting. Missing values are skipped.
method : str, optional
{"linear", "nearest"} for multidimensional array,
{"linear", "nearest", "zero", "slinear", "quadratic", "cubic"}
Expand Down Expand Up @@ -2728,7 +2728,7 @@ def interp_like(
other : Dataset or DataArray
Object with an 'indexes' attribute giving a mapping from dimension
names to an 1d array-like, which provides coordinates upon
which to index the variables in this dataset.
which to index the variables in this dataset. Missing values are skipped.
method : str, optional
{"linear", "nearest"} for multidimensional array,
{"linear", "nearest", "zero", "slinear", "quadratic", "cubic"}
Expand Down
15 changes: 13 additions & 2 deletions xarray/core/missing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime as dt
import warnings
from distutils.version import LooseVersion
from functools import partial
from numbers import Number
from typing import Any, Callable, Dict, Hashable, Sequence, Union
Expand Down Expand Up @@ -550,9 +551,19 @@ def _localize(var, indexes_coords):
"""
indexes = {}
for dim, [x, new_x] in indexes_coords.items():
if np.issubdtype(new_x.dtype, np.datetime64) and LooseVersion(
np.__version__
) < LooseVersion("1.18"):
# np.nanmin/max changed behaviour for datetime types in numpy 1.18,
# see https://github.com/pydata/xarray/pull/3924/files
minval = np.min(new_x.values)
maxval = np.max(new_x.values)
else:
minval = np.nanmin(new_x.values)
maxval = np.nanmax(new_x.values)
index = x.to_index()
imin = index.get_loc(np.min(new_x.values), method="nearest")
imax = index.get_loc(np.max(new_x.values), method="nearest")
imin = index.get_loc(minval, method="nearest")
imax = index.get_loc(maxval, method="nearest")

indexes[dim] = slice(max(imin - 2, 0), imax + 2)
indexes_coords[dim] = (x[indexes[dim]], new_x)
Expand Down
27 changes: 27 additions & 0 deletions xarray/tests/test_interp.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,32 @@ def test_interpolate_nd_nd():
da.interp(a=ia)


@requires_scipy
def test_interpolate_nd_with_nan():
mathause marked this conversation as resolved.
Show resolved Hide resolved
"""Interpolate an array with an nd indexer and `NaN` values."""

# Create indexer into `a` with dimensions (y, x)
x = [0, 1, 2]
y = [10, 20]
c = {"x": x, "y": y}
a = np.arange(6, dtype=float).reshape(2, 3)
a[0, 1] = np.nan
ia = xr.DataArray(a, dims=("y", "x"), coords=c)

da = xr.DataArray([1, 2, 2], dims=("a"), coords={"a": [0, 2, 4]})
out = da.interp(a=ia)
expected = xr.DataArray(
[[1.0, np.nan, 2.0], [2.0, 2.0, np.nan]], dims=("y", "x"), coords=c
)
xr.testing.assert_allclose(out.drop_vars("a"), expected)

db = 2 * da
ds = xr.Dataset({"da": da, "db": db})
out = ds.interp(a=ia)
expected_ds = xr.Dataset({"da": expected, "db": 2 * expected})
xr.testing.assert_allclose(out.drop_vars("a"), expected_ds)


@pytest.mark.parametrize("method", ["linear"])
@pytest.mark.parametrize("case", [0, 1])
def test_interpolate_scalar(method, case):
Expand Down Expand Up @@ -553,6 +579,7 @@ def test_interp_like():
[0.5, 1.5],
),
(["2000-01-01T12:00", "2000-01-02T12:00"], [0.5, 1.5]),
(["2000-01-01T12:00", "2000-01-02T12:00", "NaT"], [0.5, 1.5, np.nan]),
(["2000-01-01T12:00"], 0.5),
pytest.param("2000-01-01T12:00", 0.5, marks=pytest.mark.xfail),
],
Expand Down