From ca2ef3a461400e92774c66ef65477b83019dd88e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 17:18:01 +0100 Subject: [PATCH 01/26] Support interp with different dtypes --- xarray/core/dataset.py | 43 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index db45157e7c1..548d4e1f5ae 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2890,18 +2890,33 @@ def _validate_interp_indexer(x, new_x): if name in indexers: continue - if var.dtype.kind in "uifc": - var_indexers = { - k: _validate_interp_indexer(maybe_variable(obj, k), v) - for k, v in indexers.items() - if k in var.dims - } - variables[name] = missing.interp(var, var_indexers, method, **kwargs) + dtype_kind = var.dtype.kind + if dtype_kind in "uifcb": + if dtype_kind == "b" + # For types that we do not understand do stepwise + # interpolation to avoid modifying the elements: + _method = "nearest" + else: + # For normal number types do the interpolation: + _method = method + + var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} + variables[name] = missing.interp(var, var_indexers, _method, **kwargs) + elif dtype_kind == "O": + # ds.reindex seems faster than missing.interp and + # supports objects but inside this loop there might be + # some duplicate code that slows it down, therefore add + # these signals together and run it later: + reindex[name] = var elif all(d not in indexers for d in var.dims): - # keep unrelated object array + # For anything else we can only keep variables if they + # are not dependent on any coords that are being + # interpolated along: variables[name] = var + # Get the coords that also exist in the variables: coord_names = obj._coord_names & variables.keys() + # Get the indexes that are not being interpolated along: indexes = {k: v for k, v in obj.indexes.items() if k not in indexers} selected = self._replace_with_new_dims( variables.copy(), coord_names, indexes=indexes @@ -2914,6 +2929,18 @@ def _validate_interp_indexer(x, new_x): if v.dims == (k,): indexes[k] = v.to_index() + # TODO: Where should this be? + # Reindex variables: + if len(reindex) > 0: + variables_reindex = alignment.reindex_variables( + variables=reindex, + sizes=obj.sizes, + indexes=obj.indexes, + indexers={k: v[-1] for k, v in validated_indexers.items()}, + method="nearest", + )[0] + variables.update(variables_reindex) + # Extract coordinates from indexers coord_vars, new_indexes = selected._get_indexers_coords_and_indexes(coords) variables.update(coord_vars) From 6e4aab8389d4a34f11b47542aa1ef8cae536d28b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 17:22:58 +0100 Subject: [PATCH 02/26] Update dataset.py --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 548d4e1f5ae..5a9d720c3cd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2892,7 +2892,7 @@ def _validate_interp_indexer(x, new_x): dtype_kind = var.dtype.kind if dtype_kind in "uifcb": - if dtype_kind == "b" + if dtype_kind == "b": # For types that we do not understand do stepwise # interpolation to avoid modifying the elements: _method = "nearest" From f1509e3499979e53fd1a5b7628f60f099422c171 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 17:27:13 +0100 Subject: [PATCH 03/26] copy parts of #4740 --- xarray/core/dataset.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5a9d720c3cd..f438fe4943f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2885,7 +2885,28 @@ def _validate_interp_indexer(x, new_x): ) return x, new_x + validated_indexers = { + k: _validate_interp_indexer(maybe_variable(obj, k), v) + for k, v in indexers.items() + } + + # optimization: subset to coordinate range of the target index + if method in ["linear", "nearest"]: + for k, v in validated_indexers.items(): + obj, newidx = missing._localize(obj, {k: v}) + validated_indexers[k] = newidx[k] + + # optimization: create dask coordinate arrays once per Dataset + # rather than once per Variable when dask.array.unify_chunks is called later + # GH4739 + if obj.__dask_graph__(): + dask_indexers = { + k: (index.to_base_variable().chunk(), dest.to_base_variable().chunk()) + for k, (index, dest) in validated_indexers.items() + } + variables: Dict[Hashable, Variable] = {} + reindex: Dict[Hashable, Variable] = {} for name, var in obj._variables.items(): if name in indexers: continue From 8f4024a8bc141f49d3a85258388b21140f2ed905 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 17:29:49 +0100 Subject: [PATCH 04/26] Update dataset.py --- xarray/core/dataset.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f438fe4943f..45f964243f8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2911,6 +2911,11 @@ def _validate_interp_indexer(x, new_x): if name in indexers: continue + if is_duck_dask_array(var.data): + use_indexers = dask_indexers + else: + use_indexers = validated_indexers + dtype_kind = var.dtype.kind if dtype_kind in "uifcb": if dtype_kind == "b": From 906235232fd35ec8ccf5b946d0739d2a4d85574a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 18:08:54 +0100 Subject: [PATCH 05/26] Update dataset.py --- xarray/core/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 45f964243f8..8f7a442b920 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2918,13 +2918,13 @@ def _validate_interp_indexer(x, new_x): dtype_kind = var.dtype.kind if dtype_kind in "uifcb": + # For normal number types do the interpolation: + _method = method + if dtype_kind == "b": # For types that we do not understand do stepwise # interpolation to avoid modifying the elements: _method = "nearest" - else: - # For normal number types do the interpolation: - _method = method var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} variables[name] = missing.interp(var, var_indexers, _method, **kwargs) From 8b9d605804b3be11a4aa3552a590db50df56a704 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 19:50:01 +0100 Subject: [PATCH 06/26] use reindex for all weird dtypes --- xarray/core/dataset.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8f7a442b920..05c34c761c9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2906,7 +2906,7 @@ def _validate_interp_indexer(x, new_x): } variables: Dict[Hashable, Variable] = {} - reindex: Dict[Hashable, Variable] = {} + to_reindex: Dict[Hashable, Variable] = {} for name, var in obj._variables.items(): if name in indexers: continue @@ -2917,23 +2917,18 @@ def _validate_interp_indexer(x, new_x): use_indexers = validated_indexers dtype_kind = var.dtype.kind - if dtype_kind in "uifcb": + if dtype_kind in "uifc": # For normal number types do the interpolation: - _method = method - - if dtype_kind == "b": - # For types that we do not understand do stepwise - # interpolation to avoid modifying the elements: - _method = "nearest" - var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} variables[name] = missing.interp(var, var_indexers, _method, **kwargs) - elif dtype_kind == "O": - # ds.reindex seems faster than missing.interp and - # supports objects but inside this loop there might be - # some duplicate code that slows it down, therefore add - # these signals together and run it later: - reindex[name] = var + elif dtype_kind == "ObU": + # For types that we do not understand do stepwise + # interpolation to avoid modifying the elements. + # Use reindex_variables instead because it supports + # booleans and objects and retains the dtype but inside + # this loop there might be some duplicate code that slows it + # down, therefore add these signals together and run it later: + to_reindex[name] = var elif all(d not in indexers for d in var.dims): # For anything else we can only keep variables if they # are not dependent on any coords that are being @@ -2957,9 +2952,9 @@ def _validate_interp_indexer(x, new_x): # TODO: Where should this be? # Reindex variables: - if len(reindex) > 0: + if len(to_reindex) > 0: variables_reindex = alignment.reindex_variables( - variables=reindex, + variables=to_reindex, sizes=obj.sizes, indexes=obj.indexes, indexers={k: v[-1] for k, v in validated_indexers.items()}, From b2d8350c61d7a2a7dc0fa63004b6e50a2c81f529 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 19:52:23 +0100 Subject: [PATCH 07/26] Update dataset.py --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 05c34c761c9..627ba434d5b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2920,7 +2920,7 @@ def _validate_interp_indexer(x, new_x): if dtype_kind in "uifc": # For normal number types do the interpolation: var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} - variables[name] = missing.interp(var, var_indexers, _method, **kwargs) + variables[name] = missing.interp(var, var_indexers, method, **kwargs) elif dtype_kind == "ObU": # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. From 9ae2030868c72fd45c280c91960bdd18d71ed3ac Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 21:08:45 +0100 Subject: [PATCH 08/26] add test --- xarray/tests/test_dataset.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 13cd03acf96..dbbc066b860 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4010,6 +4010,19 @@ def func(arg1, arg2, arg3=0.0): actual = ds.resample(time="D").map(func, args=(1.0,), arg3=1.0) assert_identical(expected, actual) + @requires_scipy + def test_ds_interp(self): + data_vars = dict( + a=("time", np.array([1, 1.25, 2])), + b=("time", np.array([True, True, False], dtype=bool)), + c=("time", np.array(["start", "start", "end"], dtype=str)) + ) + time = np.array([0, 0.25, 1], dtype=float) + ds = Dataset(data_vars, coords=dict(time=time)) + ds1 = Dataset({k: (dim, arr[[0, -1]]) for k, (dim, arr) in data_vars.items()}, coords=dict(time=time[[0, -1]])) + ds2 = ds.interp(time=time, method="linear") + assert_identical(ds, ds2) + def test_to_array(self): ds = Dataset( {"a": 1, "b": ("x", [1, 2, 3])}, From c202d621599aa759ead303c2f212a573372e64f6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 21:14:46 +0100 Subject: [PATCH 09/26] Update test_dataset.py --- xarray/tests/test_dataset.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index dbbc066b860..ceeddf2a972 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4015,13 +4015,16 @@ def test_ds_interp(self): data_vars = dict( a=("time", np.array([1, 1.25, 2])), b=("time", np.array([True, True, False], dtype=bool)), - c=("time", np.array(["start", "start", "end"], dtype=str)) + c=("time", np.array(["start", "start", "end"], dtype=str)), ) time = np.array([0, 0.25, 1], dtype=float) - ds = Dataset(data_vars, coords=dict(time=time)) - ds1 = Dataset({k: (dim, arr[[0, -1]]) for k, (dim, arr) in data_vars.items()}, coords=dict(time=time[[0, -1]])) - ds2 = ds.interp(time=time, method="linear") - assert_identical(ds, ds2) + expected = Dataset(data_vars, coords=dict(time=time)) + actual = Dataset( + {k: (dim, arr[[0, -1]]) for k, (dim, arr) in data_vars.items()}, + coords=dict(time=time[[0, -1]]), + ) + actual = actual.interp(time=time, method="linear") + assert_identical(expected, actual) def test_to_array(self): ds = Dataset( From bdc1a6540ec7837b03ec28a06ca3bd58767b1cee Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 21:44:04 +0100 Subject: [PATCH 10/26] Update test_dataset.py --- xarray/tests/test_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index ceeddf2a972..735bf22c4b3 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4018,8 +4018,8 @@ def test_ds_interp(self): c=("time", np.array(["start", "start", "end"], dtype=str)), ) time = np.array([0, 0.25, 1], dtype=float) - expected = Dataset(data_vars, coords=dict(time=time)) - actual = Dataset( + expected = xr.Dataset(data_vars, coords=dict(time=time)) + actual = xr.Dataset( {k: (dim, arr[[0, -1]]) for k, (dim, arr) in data_vars.items()}, coords=dict(time=time[[0, -1]]), ) From 12a721f016c2fb7cafebe4177edd8f1b9b7fa372 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 22:10:51 +0100 Subject: [PATCH 11/26] Update dataset.py --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 627ba434d5b..42faca9f4a9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2921,7 +2921,7 @@ def _validate_interp_indexer(x, new_x): # For normal number types do the interpolation: var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} variables[name] = missing.interp(var, var_indexers, method, **kwargs) - elif dtype_kind == "ObU": + elif dtype_kind in "ObU": # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. # Use reindex_variables instead because it supports From b449bc95523443eeb44530735cd6e8707f2808b9 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 7 Mar 2021 23:58:51 +0100 Subject: [PATCH 12/26] move reindex up a bit --- xarray/core/dataset.py | 11 +++++++++++ xarray/tests/test_dataset.py | 16 ---------------- xarray/tests/test_interp.py | 22 +++++++++++++--------- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 42faca9f4a9..709da32b964 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2935,6 +2935,17 @@ def _validate_interp_indexer(x, new_x): # interpolated along: variables[name] = var + if len(to_reindex) > 0: + # Reindex variables: + variables_reindex = alignment.reindex_variables( + variables=to_reindex, + sizes=obj.sizes, + indexes=obj.indexes, + indexers={k: v[-1] for k, v in validated_indexers.items()}, + method="nearest", + )[0] + variables.update(variables_reindex) + # Get the coords that also exist in the variables: coord_names = obj._coord_names & variables.keys() # Get the indexes that are not being interpolated along: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 735bf22c4b3..13cd03acf96 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4010,22 +4010,6 @@ def func(arg1, arg2, arg3=0.0): actual = ds.resample(time="D").map(func, args=(1.0,), arg3=1.0) assert_identical(expected, actual) - @requires_scipy - def test_ds_interp(self): - data_vars = dict( - a=("time", np.array([1, 1.25, 2])), - b=("time", np.array([True, True, False], dtype=bool)), - c=("time", np.array(["start", "start", "end"], dtype=str)), - ) - time = np.array([0, 0.25, 1], dtype=float) - expected = xr.Dataset(data_vars, coords=dict(time=time)) - actual = xr.Dataset( - {k: (dim, arr[[0, -1]]) for k, (dim, arr) in data_vars.items()}, - coords=dict(time=time[[0, -1]]), - ) - actual = actual.interp(time=time, method="linear") - assert_identical(expected, actual) - def test_to_array(self): ds = Dataset( {"a": 1, "b": ("x", [1, 2, 3])}, diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 9212f870009..ab81f71f6ae 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -416,15 +416,19 @@ def test_errors(use_dask): @requires_scipy def test_dtype(): - ds = xr.Dataset( - {"var1": ("x", [0, 1, 2]), "var2": ("x", ["a", "b", "c"])}, - coords={"x": [0.1, 0.2, 0.3], "z": ("x", ["a", "b", "c"])}, - ) - actual = ds.interp(x=[0.15, 0.25]) - assert "var1" in actual - assert "var2" not in actual - # object array should be dropped - assert "z" not in actual.coords + data_vars = dict( + a=("time", np.array([1, 1.25, 2])), + b=("time", np.array([True, True, False], dtype=bool)), + c=("time", np.array(["start", "start", "end"], dtype=str)), + ) + time = np.array([0, 0.25, 1], dtype=float) + expected = xr.Dataset(data_vars, coords=dict(time=time)) + actual = xr.Dataset( + {k: (dim, arr[[0, -1]]) for k, (dim, arr) in data_vars.items()}, + coords=dict(time=time[[0, -1]]), + ) + actual = actual.interp(time=time, method="linear") + assert_identical(expected, actual) @requires_scipy From dd678c821e7512c1c5ce4061e8856245c9074d97 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 8 Mar 2021 07:26:52 +0100 Subject: [PATCH 13/26] Update dataset.py --- xarray/core/dataset.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 709da32b964..7ae60e0ce77 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2961,18 +2961,6 @@ def _validate_interp_indexer(x, new_x): if v.dims == (k,): indexes[k] = v.to_index() - # TODO: Where should this be? - # Reindex variables: - if len(to_reindex) > 0: - variables_reindex = alignment.reindex_variables( - variables=to_reindex, - sizes=obj.sizes, - indexes=obj.indexes, - indexers={k: v[-1] for k, v in validated_indexers.items()}, - method="nearest", - )[0] - variables.update(variables_reindex) - # Extract coordinates from indexers coord_vars, new_indexes = selected._get_indexers_coords_and_indexes(coords) variables.update(coord_vars) From 7fdb44bbdfd8123ccfb0ecde5856175519883c50 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 9 Mar 2021 20:49:27 +0100 Subject: [PATCH 14/26] dont reindex if var has no dims --- xarray/core/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7ae60e0ce77..a7f304a0fda 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2928,7 +2928,8 @@ def _validate_interp_indexer(x, new_x): # booleans and objects and retains the dtype but inside # this loop there might be some duplicate code that slows it # down, therefore add these signals together and run it later: - to_reindex[name] = var + if use_indexers.keys() & var.dims: + to_reindex[name] = var elif all(d not in indexers for d in var.dims): # For anything else we can only keep variables if they # are not dependent on any coords that are being From 7faa4a026e1352cf0bee9a2f602856551e00d023 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 9 Mar 2021 21:22:39 +0100 Subject: [PATCH 15/26] Update dataset.py --- xarray/core/dataset.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a7f304a0fda..6d872c4ba93 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2921,15 +2921,14 @@ def _validate_interp_indexer(x, new_x): # For normal number types do the interpolation: var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} variables[name] = missing.interp(var, var_indexers, method, **kwargs) - elif dtype_kind in "ObU": + elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims): # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. # Use reindex_variables instead because it supports # booleans and objects and retains the dtype but inside # this loop there might be some duplicate code that slows it # down, therefore add these signals together and run it later: - if use_indexers.keys() & var.dims: - to_reindex[name] = var + to_reindex[name] = var elif all(d not in indexers for d in var.dims): # For anything else we can only keep variables if they # are not dependent on any coords that are being From c4b7927a523ef588a94a1814d3773a3e4dc4d601 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 11 Mar 2021 07:10:49 +0100 Subject: [PATCH 16/26] Update dataset.py --- xarray/core/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6d872c4ba93..9caa9f9d726 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2927,7 +2927,7 @@ def _validate_interp_indexer(x, new_x): # Use reindex_variables instead because it supports # booleans and objects and retains the dtype but inside # this loop there might be some duplicate code that slows it - # down, therefore add these signals together and run it later: + # down, therefore collect these signals and run it later: to_reindex[name] = var elif all(d not in indexers for d in var.dims): # For anything else we can only keep variables if they @@ -2935,7 +2935,7 @@ def _validate_interp_indexer(x, new_x): # interpolated along: variables[name] = var - if len(to_reindex) > 0: + if to_reindex: # Reindex variables: variables_reindex = alignment.reindex_variables( variables=to_reindex, @@ -2954,7 +2954,7 @@ def _validate_interp_indexer(x, new_x): variables.copy(), coord_names, indexes=indexes ) - # attach indexer as coordinate + # Attach indexer as coordinate variables.update(indexers) for k, v in indexers.items(): assert isinstance(v, Variable) From 0161669e19723244d8b42c39e25bc3e2b6f60729 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 13 Mar 2021 10:42:04 +0100 Subject: [PATCH 17/26] add parameter for for non-numerics --- xarray/core/dataset.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9caa9f9d726..097523e8802 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2723,6 +2723,7 @@ def interp( self, coords: Mapping[Hashable, Any] = None, method: str = "linear", + method_for_non_numerics: str = "nearest", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, **coords_kwargs: Any, @@ -2740,6 +2741,10 @@ def interp( {"linear", "nearest"} for multidimensional array, {"linear", "nearest", "zero", "slinear", "quadratic", "cubic"} for 1-dimensional array. "linear" is used by default. + method_for_non_numerics : str, optional + Method for non-numerics where modifying the elements is not + possible. See Dataset.reindex for options. "nearest" is used by + default. assume_sorted : bool, optional If False, values of coordinates that are interpolated over can be in any order and they are sorted first. If True, interpolated @@ -2942,7 +2947,7 @@ def _validate_interp_indexer(x, new_x): sizes=obj.sizes, indexes=obj.indexes, indexers={k: v[-1] for k, v in validated_indexers.items()}, - method="nearest", + method=method_for_non_numerics, )[0] variables.update(variables_reindex) From dc12fe40d88dc336579185cf7b38d5c16b9e8f4f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 13 Mar 2021 10:57:21 +0100 Subject: [PATCH 18/26] Update dataset.py --- xarray/core/dataset.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 097523e8802..2d24712bb97 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2723,8 +2723,7 @@ def interp( self, coords: Mapping[Hashable, Any] = None, method: str = "linear", - method_for_non_numerics: str = "nearest", - assume_sorted: bool = False, +c assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, **coords_kwargs: Any, ) -> "Dataset": @@ -2741,10 +2740,6 @@ def interp( {"linear", "nearest"} for multidimensional array, {"linear", "nearest", "zero", "slinear", "quadratic", "cubic"} for 1-dimensional array. "linear" is used by default. - method_for_non_numerics : str, optional - Method for non-numerics where modifying the elements is not - possible. See Dataset.reindex for options. "nearest" is used by - default. assume_sorted : bool, optional If False, values of coordinates that are interpolated over can be in any order and they are sorted first. If True, interpolated @@ -2754,6 +2749,10 @@ def interp( Additional keyword arguments passed to scipy's interpolator. Valid options and their behavior depend on if 1-dimensional or multi-dimensional interpolation is used. + method_for_non_numerics : str, optional + Method for non-numerics where modifying the elements is not + possible. See Dataset.reindex for options. "nearest" is used by + default. **coords_kwargs : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -2980,6 +2979,7 @@ def interp_like( method: str = "linear", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, + method_for_non_numerics: str = "nearest", ) -> "Dataset": """Interpolate this object onto the coordinates of another object, filling the out of range values with NaN. @@ -3001,6 +3001,10 @@ def interp_like( values. kwargs : dict, optional Additional keyword passed to scipy's interpolator. + method_for_non_numerics : str, optional + Method for non-numerics where modifying the elements is not + possible. See Dataset.reindex for options. "nearest" is used by + default. Returns ------- @@ -3036,7 +3040,13 @@ def interp_like( # We do not support interpolation along object coordinate. # reindex instead. ds = self.reindex(object_coords) - return ds.interp(numeric_coords, method, assume_sorted, kwargs) + return ds.interp( + coords=numeric_coords, + method=method, + assume_sorted=assume_sorted, + kwargs=kwargs + method_for_non_numerics=method_for_non_numerics, + ) # Helper methods for rename() def _rename_vars(self, name_dict, dims_dict): From b278b67541639064bafca466eb79e57e6f60fb9f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 13 Mar 2021 11:05:09 +0100 Subject: [PATCH 19/26] Update dataset.py --- xarray/core/dataset.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2d24712bb97..4d8d57e7029 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2723,8 +2723,9 @@ def interp( self, coords: Mapping[Hashable, Any] = None, method: str = "linear", -c assume_sorted: bool = False, + assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, + method_for_non_numerics: str = "nearest", **coords_kwargs: Any, ) -> "Dataset": """Multidimensional interpolation of Dataset. @@ -3044,7 +3045,7 @@ def interp_like( coords=numeric_coords, method=method, assume_sorted=assume_sorted, - kwargs=kwargs + kwargs=kwargs, method_for_non_numerics=method_for_non_numerics, ) From 87afce66e26d253bf4cc5eb4f69fa6a48e8e1848 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 13 Mar 2021 11:38:03 +0100 Subject: [PATCH 20/26] test if order matters --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4d8d57e7029..40339c949f3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2723,9 +2723,9 @@ def interp( self, coords: Mapping[Hashable, Any] = None, method: str = "linear", + method_for_non_numerics: str = "nearest", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, - method_for_non_numerics: str = "nearest", **coords_kwargs: Any, ) -> "Dataset": """Multidimensional interpolation of Dataset. From 4528fe1dcdd962ab446cd4d30aab73305edc9301 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 13 Mar 2021 12:17:30 +0100 Subject: [PATCH 21/26] move method_for_numerics to end xarray is not sensitive to parameter order anymore, but move the parameter to the end anyway to stay backwards compatible. --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 40339c949f3..4d8d57e7029 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2723,9 +2723,9 @@ def interp( self, coords: Mapping[Hashable, Any] = None, method: str = "linear", - method_for_non_numerics: str = "nearest", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, + method_for_non_numerics: str = "nearest", **coords_kwargs: Any, ) -> "Dataset": """Multidimensional interpolation of Dataset. From e15d757f30bfc48a23ce951e0036f839b03f2673 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 13 May 2021 09:49:37 +0200 Subject: [PATCH 22/26] Update xarray/core/dataset.py Co-authored-by: Deepak Cherian --- xarray/core/dataset.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index da3e6c76ab0..2a1168e3c0f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3109,10 +3109,9 @@ def interp_like( values. kwargs : dict, optional Additional keyword passed to scipy's interpolator. - method_for_non_numerics : str, optional - Method for non-numerics where modifying the elements is not - possible. See Dataset.reindex for options. "nearest" is used by - default. + method_non_numeric : {"nearest", "pad", "ffill", "backfill", "bfill"}, optional + Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`. + ``"nearest"`` is used by default. Returns ------- From 57ce43f8a75e382cdb5942beafcea6441b6443e2 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 13 May 2021 09:53:47 +0200 Subject: [PATCH 23/26] Update xarray/core/dataset.py Co-authored-by: Deepak Cherian --- xarray/core/dataset.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6414da81a5c..66b68340182 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2878,10 +2878,9 @@ def interp( Additional keyword arguments passed to scipy's interpolator. Valid options and their behavior depend on if 1-dimensional or multi-dimensional interpolation is used. - method_for_non_numerics : str, optional - Method for non-numerics where modifying the elements is not - possible. See Dataset.reindex for options. "nearest" is used by - default. + method_non_numeric : {"nearest", "pad", "ffill", "backfill", "bfill"}, optional + Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`. + ``"nearest"`` is used by default. **coords_kwargs : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. From 992f51d31a477bc6fc7d6f657884c2452bd4f2ab Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 13 May 2021 09:55:56 +0200 Subject: [PATCH 24/26] method_non_numeric --- xarray/core/dataset.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 66b68340182..0a7dbd3dd77 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2853,7 +2853,7 @@ def interp( method: str = "linear", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, - method_for_non_numerics: str = "nearest", + method_non_numeric: str = "nearest", **coords_kwargs: Any, ) -> "Dataset": """Multidimensional interpolation of Dataset. @@ -2879,7 +2879,7 @@ def interp( options and their behavior depend on if 1-dimensional or multi-dimensional interpolation is used. method_non_numeric : {"nearest", "pad", "ffill", "backfill", "bfill"}, optional - Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`. + Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`. ``"nearest"`` is used by default. **coords_kwargs : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. @@ -3074,7 +3074,7 @@ def _validate_interp_indexer(x, new_x): sizes=obj.sizes, indexes=obj.indexes, indexers={k: v[-1] for k, v in validated_indexers.items()}, - method=method_for_non_numerics, + method=method_non_numeric, )[0] variables.update(variables_reindex) @@ -3107,7 +3107,7 @@ def interp_like( method: str = "linear", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, - method_for_non_numerics: str = "nearest", + method_non_numeric: str = "nearest", ) -> "Dataset": """Interpolate this object onto the coordinates of another object, filling the out of range values with NaN. @@ -3130,7 +3130,7 @@ def interp_like( kwargs : dict, optional Additional keyword passed to scipy's interpolator. method_non_numeric : {"nearest", "pad", "ffill", "backfill", "bfill"}, optional - Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`. + Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`. ``"nearest"`` is used by default. Returns @@ -3172,7 +3172,7 @@ def interp_like( method=method, assume_sorted=assume_sorted, kwargs=kwargs, - method_for_non_numerics=method_for_non_numerics, + method_non_numeric=method_non_numeric, ) # Helper methods for rename() From 274c89131884716aab6a9ad4965e08dd084d1b94 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 13 May 2021 10:19:18 +0200 Subject: [PATCH 25/26] xindexes --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0a7dbd3dd77..67d6e467b9e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3081,7 +3081,7 @@ def _validate_interp_indexer(x, new_x): # Get the coords that also exist in the variables: coord_names = obj._coord_names & variables.keys() # Get the indexes that are not being interpolated along: - indexes = {k: v for k, v in obj.indexes.items() if k not in indexers} + indexes = {k: v for k, v in obj.xindexes.items() if k not in indexers} selected = self._replace_with_new_dims( variables.copy(), coord_names, indexes=indexes ) From d3f50414fb53ab7404a4c999a0aa84ce4f2a05db Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 13 May 2021 10:26:08 +0200 Subject: [PATCH 26/26] xindexes --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 67d6e467b9e..19af7f6c3cd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3072,7 +3072,7 @@ def _validate_interp_indexer(x, new_x): variables_reindex = alignment.reindex_variables( variables=to_reindex, sizes=obj.sizes, - indexes=obj.indexes, + indexes=obj.xindexes, indexers={k: v[-1] for k, v in validated_indexers.items()}, method=method_non_numeric, )[0]