From e5a9b60c6053fbfef449efce96f694cd04826113 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 20 May 2022 09:59:41 -0600 Subject: [PATCH] Squashed commit of the following: commit 398f1b660233402fdf7e631f0ee5f863cae564a2 Author: dcherian Date: Fri May 20 08:47:56 2022 -0600 Backward compatibility dask commit bde40e471ed7f9e5ba3a9b4066528195fb467931 Merge: 0783df364 4cae8d0ec Author: dcherian Date: Fri May 20 07:54:48 2022 -0600 Merge branch 'main' into dask-datetime-to-numeric * main: concatenate docs style (#6621) Typing for open_dataset/array/mfdataset and to_netcdf/zarr (#6612) {full,zeros,ones}_like typing (#6611) commit 0783df364716b934ad4ab0ac23570fe3e8f7b942 Merge: 5cff4f1bd 8de706151 Author: dcherian Date: Sun May 15 21:03:50 2022 -0600 Merge branch 'main' into dask-datetime-to-numeric * main: (24 commits) Fix overflow issue in decode_cf_datetime for dtypes <= np.uint32 (#6598) Enable flox in GroupBy and resample (#5734) Add setuptools as dependency in ASV benchmark CI (#6609) change polyval dim ordering (#6601) re-add timedelta support for polyval (#6599) Minor Dataset.map docstr clarification (#6595) New inline_array kwarg for open_dataset (#6566) Fix polyval overloads (#6593) Restore old MultiIndex dropping behaviour (#6592) [docs] add Dataset.assign_coords example (#6336) (#6558) Fix zarr append dtype checks (#6476) Add missing space in exception message (#6590) Doc Link to accessors list in extending-xarray.rst (#6587) Fix Dataset/DataArray.isel with drop=True and scalar DataArray indexes (#6579) Add some warnings about rechunking to the docs (#6569) [pre-commit.ci] pre-commit autoupdate (#6584) terminology.rst: fix link to Unidata's "netcdf_dataset_components" (#6583) Allow string formatting of scalar DataArrays (#5981) Fix mypy issues & reenable in tests (#6581) polyval: Use Horner's algorithm + support chunked inputs (#6548) ... commit 5cff4f1bd40cd107c76331c0bd54708ae538f2b0 Merge: dfe200d15 6144c61a7 Author: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun May 1 15:16:33 2022 -0700 Merge branch 'main' into dask-datetime-to-numeric commit dfe200d1502a62ca105b301053a4254c9286f20e Author: dcherian Date: Sun May 1 11:04:03 2022 -0600 Minor cleanup commit 35ed378f201ecf7c8cd80a7a6180583296781ed7 Author: dcherian Date: Sun May 1 10:57:36 2022 -0600 Support dask arrays in datetime_to_numeric --- xarray/core/duck_array_ops.py | 22 +++++++++++-- xarray/tests/test_duck_array_ops.py | 49 +++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 253a68b7205..033c238b959 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -431,7 +431,14 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # Compute timedelta object. # For np.datetime64, this can silently yield garbage due to overflow. # One option is to enforce 1970-01-01 as the universal offset. - array = array - offset + + # This map_blocks call is for backwards compatibility. + # dask == 2021.04.1 does not support subtracting object arrays + # which is required for cftime + if is_duck_dask_array(array): + array = array.map_blocks(lambda a, b: a - b, offset) + else: + array = array - offset # Scalar is converted to 0d-array if not hasattr(array, "dtype"): @@ -517,10 +524,19 @@ def pd_timedelta_to_float(value, datetime_unit): return np_timedelta64_to_float(value, datetime_unit) +def _timedelta_to_seconds(array): + return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + + def py_timedelta_to_float(array, datetime_unit): """Convert a timedelta object to a float, possibly at a loss of resolution.""" - array = np.asarray(array) - array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + array = asarray(array) + if is_duck_dask_array(array): + array = array.map_blocks( + _timedelta_to_seconds, meta=np.array([], dtype=np.float64) + ) + else: + array = _timedelta_to_seconds(array) conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c329bc50c56..392f1b91914 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -675,39 +675,68 @@ def test_multiple_dims(dtype, dask, skipna, func): assert_allclose(actual, expected) -def test_datetime_to_numeric_datetime64(): +@pytest.mark.parametrize("dask", [True, False]) +def test_datetime_to_numeric_datetime64(dask): + if dask and not has_dask: + pytest.skip("requires dask") + times = pd.date_range("2000", periods=5, freq="7D").values - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") + if dask: + import dask.array + + times = dask.array.from_array(times, chunks=-1) + + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h") + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="h" + ) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) dtype = np.float32 - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, datetime_unit="h", dtype=dtype + ) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) @requires_cftime -def test_datetime_to_numeric_cftime(): +@pytest.mark.parametrize("dask", [True, False]) +def test_datetime_to_numeric_cftime(dask): + if dask and not has_dask: + pytest.skip("requires dask") + times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) + if dask: + import dask.array + + times = dask.array.from_array(times, chunks=-1) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric( - times, offset=offset, datetime_unit="h", dtype=int - ) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="h", dtype=int + ) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) dtype = np.float32 - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, datetime_unit="h", dtype=dtype + ) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected)