Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow dataset interpolation with different datatypes #5008

Merged
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ca2ef3a
Support interp with different dtypes
Illviljan Mar 7, 2021
6e4aab8
Update dataset.py
Illviljan Mar 7, 2021
f1509e3
copy parts of #4740
Illviljan Mar 7, 2021
8f4024a
Update dataset.py
Illviljan Mar 7, 2021
9062352
Update dataset.py
Illviljan Mar 7, 2021
8b9d605
use reindex for all weird dtypes
Illviljan Mar 7, 2021
b2d8350
Update dataset.py
Illviljan Mar 7, 2021
9ae2030
add test
Illviljan Mar 7, 2021
c202d62
Update test_dataset.py
Illviljan Mar 7, 2021
bdc1a65
Update test_dataset.py
Illviljan Mar 7, 2021
12a721f
Update dataset.py
Illviljan Mar 7, 2021
b449bc9
move reindex up a bit
Illviljan Mar 7, 2021
dd678c8
Update dataset.py
Illviljan Mar 8, 2021
7fdb44b
dont reindex if var has no dims
Illviljan Mar 9, 2021
7faa4a0
Update dataset.py
Illviljan Mar 9, 2021
d380d0f
Merge branch 'master' into Illviljan-dataset_interp_several_dtypes
Illviljan Mar 9, 2021
c4b7927
Update dataset.py
Illviljan Mar 11, 2021
0161669
add parameter for for non-numerics
Illviljan Mar 13, 2021
dc12fe4
Update dataset.py
Illviljan Mar 13, 2021
b278b67
Update dataset.py
Illviljan Mar 13, 2021
87afce6
test if order matters
Illviljan Mar 13, 2021
4528fe1
move method_for_numerics to end
Illviljan Mar 13, 2021
a94ea14
Merge branch 'master' into Illviljan-dataset_interp_several_dtypes
Illviljan Apr 14, 2021
0515a1a
Merge branch 'master' into Illviljan-dataset_interp_several_dtypes
Illviljan May 9, 2021
e8c045c
Merge branch 'master' into Illviljan-dataset_interp_several_dtypes
Illviljan May 9, 2021
e15d757
Update xarray/core/dataset.py
Illviljan May 13, 2021
4fbc2d7
Merge branch 'master' into Illviljan-dataset_interp_several_dtypes
Illviljan May 13, 2021
57ce43f
Update xarray/core/dataset.py
Illviljan May 13, 2021
992f51d
method_non_numeric
Illviljan May 13, 2021
274c891
xindexes
Illviljan May 13, 2021
d3f5041
xindexes
Illviljan May 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 46 additions & 4 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2832,6 +2832,7 @@ def interp(
method: str = "linear",
assume_sorted: bool = False,
kwargs: Mapping[str, Any] = None,
method_for_non_numerics: str = "nearest",
Illviljan marked this conversation as resolved.
Show resolved Hide resolved
**coords_kwargs: Any,
) -> "Dataset":
"""Multidimensional interpolation of Dataset.
Expand All @@ -2856,6 +2857,10 @@ def interp(
Additional keyword arguments passed to scipy's interpolator. Valid
options and their behavior depend on if 1-dimensional or
multi-dimensional interpolation is used.
method_for_non_numerics : str, optional
Method for non-numerics where modifying the elements is not
possible. See Dataset.reindex for options. "nearest" is used by
default.
Illviljan marked this conversation as resolved.
Show resolved Hide resolved
**coords_kwargs : {dim: coordinate, ...}, optional
The keyword arguments form of ``coords``.
One of coords or coords_kwargs must be provided.
Expand Down Expand Up @@ -3013,6 +3018,7 @@ def _validate_interp_indexer(x, new_x):
}

variables: Dict[Hashable, Variable] = {}
to_reindex: Dict[Hashable, Variable] = {}
for name, var in obj._variables.items():
if name in indexers:
continue
Expand All @@ -3022,20 +3028,45 @@ def _validate_interp_indexer(x, new_x):
else:
use_indexers = validated_indexers

if var.dtype.kind in "uifc":
dtype_kind = var.dtype.kind
if dtype_kind in "uifc":
# For normal number types do the interpolation:
var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims}
variables[name] = missing.interp(var, var_indexers, method, **kwargs)
elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims):
# For types that we do not understand do stepwise
# interpolation to avoid modifying the elements.
# Use reindex_variables instead because it supports
# booleans and objects and retains the dtype but inside
# this loop there might be some duplicate code that slows it
# down, therefore collect these signals and run it later:
to_reindex[name] = var
elif all(d not in indexers for d in var.dims):
# keep unrelated object array
# For anything else we can only keep variables if they
# are not dependent on any coords that are being
# interpolated along:
variables[name] = var

if to_reindex:
# Reindex variables:
variables_reindex = alignment.reindex_variables(
variables=to_reindex,
sizes=obj.sizes,
indexes=obj.indexes,
indexers={k: v[-1] for k, v in validated_indexers.items()},
method=method_for_non_numerics,
)[0]
variables.update(variables_reindex)

# Get the coords that also exist in the variables:
coord_names = obj._coord_names & variables.keys()
# Get the indexes that are not being interpolated along:
indexes = {k: v for k, v in obj.indexes.items() if k not in indexers}
selected = self._replace_with_new_dims(
variables.copy(), coord_names, indexes=indexes
)

# attach indexer as coordinate
# Attach indexer as coordinate
variables.update(indexers)
for k, v in indexers.items():
assert isinstance(v, Variable)
Expand All @@ -3056,6 +3087,7 @@ def interp_like(
method: str = "linear",
assume_sorted: bool = False,
kwargs: Mapping[str, Any] = None,
method_for_non_numerics: str = "nearest",
) -> "Dataset":
"""Interpolate this object onto the coordinates of another object,
filling the out of range values with NaN.
Expand All @@ -3077,6 +3109,10 @@ def interp_like(
values.
kwargs : dict, optional
Additional keyword passed to scipy's interpolator.
method_for_non_numerics : str, optional
Method for non-numerics where modifying the elements is not
possible. See Dataset.reindex for options. "nearest" is used by
default.
Illviljan marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Expand Down Expand Up @@ -3112,7 +3148,13 @@ def interp_like(
# We do not support interpolation along object coordinate.
# reindex instead.
ds = self.reindex(object_coords)
return ds.interp(numeric_coords, method, assume_sorted, kwargs)
return ds.interp(
coords=numeric_coords,
method=method,
assume_sorted=assume_sorted,
kwargs=kwargs,
method_for_non_numerics=method_for_non_numerics,
)

# Helper methods for rename()
def _rename_vars(self, name_dict, dims_dict):
Expand Down
22 changes: 13 additions & 9 deletions xarray/tests/test_interp.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,15 +416,19 @@ def test_errors(use_dask):

@requires_scipy
def test_dtype():
ds = xr.Dataset(
{"var1": ("x", [0, 1, 2]), "var2": ("x", ["a", "b", "c"])},
coords={"x": [0.1, 0.2, 0.3], "z": ("x", ["a", "b", "c"])},
)
actual = ds.interp(x=[0.15, 0.25])
assert "var1" in actual
assert "var2" not in actual
# object array should be dropped
assert "z" not in actual.coords
data_vars = dict(
a=("time", np.array([1, 1.25, 2])),
b=("time", np.array([True, True, False], dtype=bool)),
c=("time", np.array(["start", "start", "end"], dtype=str)),
)
time = np.array([0, 0.25, 1], dtype=float)
expected = xr.Dataset(data_vars, coords=dict(time=time))
actual = xr.Dataset(
{k: (dim, arr[[0, -1]]) for k, (dim, arr) in data_vars.items()},
coords=dict(time=time[[0, -1]]),
)
actual = actual.interp(time=time, method="linear")
assert_identical(expected, actual)


@requires_scipy
Expand Down