From 64d3909e2290b983382cebf8e7fee6e7349ac78e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 30 Dec 2020 05:23:24 -0800 Subject: [PATCH] Backport PR #38723: BUG: inconsistency between frame.any/all with dt64 vs dt64tz --- doc/source/whatsnew/v1.2.1.rst | 2 + pandas/core/arrays/datetimelike.py | 11 ++++++ pandas/tests/frame/test_reductions.py | 4 ++ pandas/tests/reductions/test_reductions.py | 43 ++++++++++++++++++++++ 4 files changed, 60 insertions(+) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 374ac737298d3..0bc01c683e0ad 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -18,6 +18,8 @@ Fixed regressions - :meth:`to_csv` created corrupted zip files when there were more rows than ``chunksize`` (issue:`38714`) - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`) - Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`) +- Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`) +- .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index be9864731842d..2f2f8efc0c360 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1621,6 +1621,17 @@ def floor(self, freq, ambiguous="raise", nonexistent="raise"): def ceil(self, freq, ambiguous="raise", nonexistent="raise"): return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) + # -------------------------------------------------------------- + # Reductions + + def any(self, *, axis: Optional[int] = None, skipna: bool = True): + # GH#34479 discussion of desired behavior long-term + return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) + + def all(self, *, axis: Optional[int] = None, skipna: bool = True): + # GH#34479 discussion of desired behavior long-term + return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) + # -------------------------------------------------------------- # Frequency Methods diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index d33d91f2cefca..d843d4b0e9504 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1091,9 +1091,13 @@ def test_any_all_bool_only(self): (np.all, {"A": Series([0, 1], dtype=int)}, False), (np.any, {"A": Series([0, 1], dtype=int)}, True), pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns]")}, False), + pytest.param(np.all, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, False), pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns]")}, True), + pytest.param(np.any, {"A": Series([0, 1], dtype="M8[ns, UTC]")}, True), pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns]")}, True), + pytest.param(np.all, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True), pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns]")}, True), + pytest.param(np.any, {"A": Series([1, 2], dtype="M8[ns, UTC]")}, True), pytest.param(np.all, {"A": Series([0, 1], dtype="m8[ns]")}, False), pytest.param(np.any, {"A": Series([0, 1], dtype="m8[ns]")}, True), pytest.param(np.all, {"A": Series([1, 2], dtype="m8[ns]")}, True), diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 8c2297699807d..94afa204db891 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -17,6 +17,7 @@ Timedelta, TimedeltaIndex, Timestamp, + date_range, isna, timedelta_range, to_timedelta, @@ -923,6 +924,48 @@ def test_any_axis1_bool_only(self): expected = Series([True, False]) tm.assert_series_equal(result, expected) + def test_any_all_datetimelike(self): + # GH#38723 these may not be the desired long-term behavior (GH#34479) + # but in the interim should be internally consistent + dta = date_range("1995-01-02", periods=3)._data + ser = Series(dta) + df = DataFrame(ser) + + assert dta.all() + assert dta.any() + + assert ser.all() + assert ser.any() + + assert df.any().all() + assert df.all().all() + + dta = dta.tz_localize("UTC") + ser = Series(dta) + df = DataFrame(ser) + + assert dta.all() + assert dta.any() + + assert ser.all() + assert ser.any() + + assert df.any().all() + assert df.all().all() + + tda = dta - dta[0] + ser = Series(tda) + df = DataFrame(ser) + + assert tda.any() + assert not tda.all() + + assert ser.any() + assert not ser.all() + + assert df.any().all() + assert not df.all().any() + def test_timedelta64_analytics(self): # index min/max