From 360e7271756b4129e0dcd22ed15755a5fa0b87d0 Mon Sep 17 00:00:00 2001 From: Justin Zheng Date: Sun, 28 Oct 2018 15:31:13 -0700 Subject: [PATCH] BUG GH23282 calling min on series of NaT returns NaT (#23289) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/nanops.py | 38 +++++++++++++-------- pandas/tests/series/test_datetime_values.py | 18 ++++++++++ 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 89acd1a14a412..cca6238d2e89a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1030,6 +1030,7 @@ Datetimelike - Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`) - Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`) - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) +- Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`) - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) - Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 2884bc1a19491..afba433f0e391 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -244,7 +244,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, elif is_float_dtype(dtype): dtype_max = np.float64 - return values, mask, dtype, dtype_max + return values, mask, dtype, dtype_max, fill_value def _isfinite(values): @@ -266,16 +266,21 @@ def _view_if_needed(values): return values -def _wrap_results(result, dtype): +def _wrap_results(result, dtype, fill_value=None): """ wrap our results if needed """ if is_datetime64_dtype(dtype): if not isinstance(result, np.ndarray): + assert not isna(fill_value), "Expected non-null fill_value" + if result == fill_value: + result = np.nan result = tslibs.Timestamp(result) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): if not isinstance(result, np.ndarray): + if result == fill_value: + result = np.nan # raise if we have a timedelta64[ns] which is too large if np.fabs(result) > _int64_max: @@ -346,8 +351,8 @@ def nanany(values, axis=None, skipna=True, mask=None): >>> nanops.nanany(s) False """ - values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna, - mask=mask) + values, mask, dtype, _, _ = _get_values(values, skipna, False, copy=skipna, + mask=mask) return values.any(axis) @@ -379,8 +384,8 @@ def nanall(values, axis=None, skipna=True, mask=None): >>> nanops.nanall(s) False """ - values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna, - mask=mask) + values, mask, dtype, _, _ = _get_values(values, skipna, True, copy=skipna, + mask=mask) return values.all(axis) @@ -409,7 +414,8 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None): >>> nanops.nansum(s) 3.0 """ - values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) + values, mask, dtype, dtype_max, _ = _get_values(values, + skipna, 0, mask=mask) dtype_sum = dtype_max if is_float_dtype(dtype): dtype_sum = dtype @@ -448,7 +454,8 @@ def nanmean(values, axis=None, skipna=True, mask=None): >>> nanops.nanmean(s) 1.5 """ - values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) + values, mask, dtype, dtype_max, _ = _get_values( + values, skipna, 0, mask=mask) dtype_sum = dtype_max dtype_count = np.float64 if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype): @@ -501,7 +508,7 @@ def get_median(x): return np.nan return np.nanmedian(x[mask]) - values, mask, dtype, dtype_max = _get_values(values, skipna, mask=mask) + values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask) if not is_float_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -705,7 +712,8 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None): def _nanminmax(meth, fill_value_typ): @bottleneck_switch() def reduction(values, axis=None, skipna=True, mask=None): - values, mask, dtype, dtype_max = _get_values( + + values, mask, dtype, dtype_max, fill_value = _get_values( values, skipna, fill_value_typ=fill_value_typ, mask=mask) if ((axis is not None and values.shape[axis] == 0) or @@ -719,7 +727,7 @@ def reduction(values, axis=None, skipna=True, mask=None): else: result = getattr(values, meth)(axis) - result = _wrap_results(result, dtype) + result = _wrap_results(result, dtype, fill_value) return _maybe_null_out(result, axis, mask) reduction.__name__ = 'nan' + meth @@ -753,8 +761,8 @@ def nanargmax(values, axis=None, skipna=True, mask=None): >>> nanops.nanargmax(s) 4 """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf', - mask=mask) + values, mask, dtype, _, _ = _get_values( + values, skipna, fill_value_typ='-inf', mask=mask) result = values.argmax(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result @@ -783,8 +791,8 @@ def nanargmin(values, axis=None, skipna=True, mask=None): >>> nanops.nanargmin(s) 0 """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf', - mask=mask) + values, mask, dtype, _, _ = _get_values( + values, skipna, fill_value_typ='+inf', mask=mask) result = values.argmin(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 4825ecbe51584..618745eb25a5a 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -508,3 +508,21 @@ def test_dt_timetz_accessor(self, tz_naive_fixture): time(22, 14, tzinfo=tz)]) result = s.dt.timetz tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('nat', [ + pd.Series([pd.NaT, pd.NaT]), + pd.Series([pd.NaT, pd.Timedelta('nat')]), + pd.Series([pd.Timedelta('nat'), pd.Timedelta('nat')])]) + def test_minmax_nat_series(self, nat): + # GH 23282 + assert nat.min() is pd.NaT + assert nat.max() is pd.NaT + + @pytest.mark.parametrize('nat', [ + # GH 23282 + pd.DataFrame([pd.NaT, pd.NaT]), + pd.DataFrame([pd.NaT, pd.Timedelta('nat')]), + pd.DataFrame([pd.Timedelta('nat'), pd.Timedelta('nat')])]) + def test_minmax_nat_dataframe(self, nat): + assert nat.min()[0] is pd.NaT + assert nat.max()[0] is pd.NaT