Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Numpy 1.18 support #3537

Merged
merged 7 commits into from
Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/azure/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ steps:
--pre \
--upgrade \
matplotlib \
numpy \
pandas \
scipy
# numpy \ # FIXME https://github.com/pydata/xarray/issues/3409
pip install \
--no-deps \
--upgrade \
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/py36.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- numpy
- pandas
- pint
- pip
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/py37.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- numpy
- pandas
- pint
- pip
Expand Down
7 changes: 6 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ Bug fixes
(:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
- Allow appending datetime and bool data variables to zarr stores.
(:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
- Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend
(:issue:`3409`, :pull:`3537`). By `Guido Imperiale <https://github.com/crusaderky>`_.
- Add support for pandas >=0.26 (:issue:`3440`).
By `Deepak Cherian <https://github.com/dcherian>`_.
- Add support for pseudonetcdf >=3.1 (:pull:`3485`).
By `Barron Henderson <https://github.com/barronh>`_.

Documentation
~~~~~~~~~~~~~
Expand All @@ -133,7 +139,6 @@ Documentation

Internal Changes
~~~~~~~~~~~~~~~~

- Added integration tests against `pint <https://pint.readthedocs.io/>`_.
(:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
by `Justus Magin <https://github.com/keewis>`_.
Expand Down
4 changes: 3 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5316,7 +5316,9 @@ def _integrate_one(self, coord, datetime_unit=None):
datetime_unit, _ = np.datetime_data(coord_var.dtype)
elif datetime_unit is None:
datetime_unit = "s" # Default to seconds for cftime objects
coord_var = datetime_to_numeric(coord_var, datetime_unit=datetime_unit)
coord_var = coord_var._replace(
data=datetime_to_numeric(coord_var.data, datetime_unit=datetime_unit)
)

variables = {}
coord_names = set()
Expand Down
28 changes: 26 additions & 2 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,26 @@ def f(values, axis=None, skipna=None, **kwargs):
_mean = _create_nan_agg_method("mean")


def _datetime_nanmin(array):
"""nanmin() function for datetime64.

Caveats that this function deals with:

- In numpy < 1.18, min() on datetime64 incorrectly ignores NaT
- numpy nanmin() don't work on datetime64 (all versions at the moment of writing)
- dask min() does not work on datetime64 (all versions at the moment of writing)
"""
assert array.dtype.kind in "mM"
dtype = array.dtype
# (NaT).astype(float) does not produce NaN...
array = where(pandas_isnull(array), np.nan, array.astype(float))
array = min(array, skipna=True)
if isinstance(array, float):
array = np.array(array)
# ...but (NaN).astype("M8") does produce NaT
return array.astype(dtype)


def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.

Expand All @@ -370,7 +390,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""
# TODO: make this function dask-compatible?
if offset is None:
offset = array.min()
if array.dtype.kind in "Mm":
offset = _datetime_nanmin(array)
else:
offset = min(array)
array = array - offset

if not hasattr(array, "dtype"): # scalar is converted to 0d-array
Expand Down Expand Up @@ -401,7 +424,8 @@ def mean(array, axis=None, skipna=None, **kwargs):

array = asarray(array)
if array.dtype.kind in "Mm":
offset = min(array)
offset = _datetime_nanmin(array)

# xarray always uses np.datetime64[ns] for np.datetime64 data
dtype = "timedelta64[ns]"
return (
Expand Down
4 changes: 3 additions & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5874,7 +5874,9 @@ def test_trapz_datetime(dask, which_datetime):

actual = da.integrate("time", datetime_unit="D")
expected_data = np.trapz(
da, duck_array_ops.datetime_to_numeric(da["time"], datetime_unit="D"), axis=0
da.data,
duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"),
axis=0,
)
expected = xr.DataArray(
expected_data,
Expand Down
50 changes: 33 additions & 17 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,23 +274,39 @@ def assert_dask_array(da, dask):


@arm_xfail
@pytest.mark.parametrize("dask", [False, True])
def test_datetime_reduce(dask):
time = np.array(pd.date_range("15/12/1999", periods=11))
time[8:11] = np.nan
da = DataArray(np.linspace(0, 365, num=11), dims="time", coords={"time": time})

if dask and has_dask:
chunks = {"time": 5}
da = da.chunk(chunks)

actual = da["time"].mean()
assert not pd.isnull(actual)
actual = da["time"].mean(skipna=False)
assert pd.isnull(actual)

# test for a 0d array
assert da["time"][0].mean() == da["time"][:1].mean()
@pytest.mark.parametrize("dask", [False, True] if has_dask else [False])
def test_datetime_mean(dask):
# Note: only testing numpy, as dask is broken upstream
da = DataArray(
np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8"),
dims=["time"],
)
if dask:
# Trigger use case where a chunk is full of NaT
da = da.chunk({"time": 3})

expect = DataArray(np.array("2010-01-02", dtype="M8"))
expect_nat = DataArray(np.array("NaT", dtype="M8"))

actual = da.mean()
if dask:
assert actual.chunks is not None
assert_equal(actual, expect)

actual = da.mean(skipna=False)
if dask:
assert actual.chunks is not None
assert_equal(actual, expect_nat)

# tests for 1d array full of NaT
assert_equal(da[[1]].mean(), expect_nat)
assert_equal(da[[1]].mean(skipna=False), expect_nat)

# tests for a 0d array
assert_equal(da[0].mean(), da[0])
assert_equal(da[0].mean(skipna=False), da[0])
assert_equal(da[1].mean(), expect_nat)
assert_equal(da[1].mean(skipna=False), expect_nat)


@requires_cftime
Expand Down