Skip to content

Commit

Permalink
implement+test mean for datetimelike EA/Index/Series (#24757)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Jun 10, 2019
1 parent 959e799 commit efc7f2f
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 1 deletion.
14 changes: 14 additions & 0 deletions doc/source/reference/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,13 @@ Conversion
DatetimeIndex.to_series
DatetimeIndex.to_frame

Methods
~~~~~~~
.. autosummary::
:toctree: api/

DatetimeIndex.mean

TimedeltaIndex
--------------
.. autosummary::
Expand Down Expand Up @@ -435,6 +442,13 @@ Conversion
TimedeltaIndex.ceil
TimedeltaIndex.to_frame

Methods
~~~~~~~
.. autosummary::
:toctree: api/

TimedeltaIndex.mean

.. currentmodule:: pandas

PeriodIndex
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ Other Enhancements
- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
- :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
- :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a `mean` method (:issue:`24757`)
-

.. _whatsnew_0250.api_breaking:
Expand Down
50 changes: 49 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,7 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
def _reduce(self, name, axis=0, skipna=True, **kwargs):
op = getattr(self, name, None)
if op:
return op(axis=axis, skipna=skipna, **kwargs)
return op(skipna=skipna, **kwargs)
else:
return super()._reduce(name, skipna, **kwargs)

Expand Down Expand Up @@ -1438,6 +1438,54 @@ def max(self, axis=None, skipna=True, *args, **kwargs):
# Don't have to worry about NA `result`, since no NA went in.
return self._box_func(result)

def mean(self, skipna=True):
"""
Return the mean value of the Array.
.. versionadded:: 0.25.0
Parameters
----------
skipna : bool, default True
Whether to ignore any NaT elements
Returns
-------
scalar (Timestamp or Timedelta)
See Also
--------
numpy.ndarray.mean
Series.mean : Return the mean value in a Series.
Notes
-----
mean is only defined for Datetime and Timedelta dtypes, not for Period.
"""
if is_period_dtype(self):
# See discussion in GH#24757
raise TypeError(
"mean is not implemented for {cls} since the meaning is "
"ambiguous. An alternative is "
"obj.to_timestamp(how='start').mean()"
.format(cls=type(self).__name__))

mask = self.isna()
if skipna:
values = self[~mask]
elif mask.any():
return NaT
else:
values = self

if not len(values):
# short-circut for empty max / min
return NaT

result = nanops.nanmean(values.view('i8'), skipna=skipna)
# Don't have to worry about NA `result`, since no NA went in.
return self._box_func(result)


# -------------------------------------------------------------------
# Shared Constructor Helpers
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin):
_maybe_mask_results = ea_passthrough(
DatetimeLikeArrayMixin._maybe_mask_results)
__iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__)
mean = ea_passthrough(DatetimeLikeArrayMixin.mean)

@property
def freq(self):
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin):
to_frame
month_name
day_name
mean
See Also
--------
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class TimedeltaIndex(DatetimeIndexOpsMixin, dtl.TimelikeOps, Int64Index,
floor
ceil
to_frame
mean
See Also
--------
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3729,6 +3729,10 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
elif is_datetime64_dtype(delegate):
# use DatetimeIndex implementation to handle skipna correctly
delegate = DatetimeIndex(delegate)
elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name):
# use TimedeltaIndex to handle skipna correctly
# TODO: remove hasattr check after TimedeltaIndex has `std` method
delegate = TimedeltaIndex(delegate)

# dispatch to numpy arrays
elif isinstance(delegate, np.ndarray):
Expand Down
41 changes: 41 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,47 @@ def test_mean_corner(self, float_frame, float_string_frame):
means = float_frame.mean(0)
assert means['bool'] == float_frame['bool'].values.mean()

def test_mean_datetimelike(self):
# GH#24757 check that datetimelike are excluded by default, handled
# correctly with numeric_only=True

df = pd.DataFrame({
'A': np.arange(3),
'B': pd.date_range('2016-01-01', periods=3),
'C': pd.timedelta_range('1D', periods=3),
'D': pd.period_range('2016', periods=3, freq='A')
})
result = df.mean(numeric_only=True)
expected = pd.Series({'A': 1.})
tm.assert_series_equal(result, expected)

result = df.mean()
expected = pd.Series({
'A': 1.,
'C': df.loc[1, 'C']
})
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(reason="casts to object-dtype and then tries to "
"add timestamps",
raises=TypeError, strict=True)
def test_mean_datetimelike_numeric_only_false(self):
df = pd.DataFrame({
'A': np.arange(3),
'B': pd.date_range('2016-01-01', periods=3),
'C': pd.timedelta_range('1D', periods=3),
'D': pd.period_range('2016', periods=3, freq='A')
})

result = df.mean(numeric_only=False)
expected = pd.Series({
'A': 1,
'B': df.loc[1, 'B'],
'C': df.loc[1, 'C'],
'D': df.loc[1, 'D']
})
tm.assert_series_equal(result, expected)

def test_stats_mixed_type(self, float_string_frame):
# don't blow up
float_string_frame.std(1)
Expand Down
69 changes: 69 additions & 0 deletions pandas/tests/reductions/test_stat_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,78 @@

import pandas as pd
from pandas import DataFrame, Series
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
import pandas.util.testing as tm


class TestDatetimeLikeStatReductions:

@pytest.mark.parametrize('box', [Series, pd.Index, DatetimeArray])
def test_dt64_mean(self, tz_naive_fixture, box):
tz = tz_naive_fixture

dti = pd.date_range('2001-01-01', periods=11, tz=tz)
# shuffle so that we are not just working with monotone-increasing
dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6])
dtarr = dti._data

obj = box(dtarr)
assert obj.mean() == pd.Timestamp('2001-01-06', tz=tz)
assert obj.mean(skipna=False) == pd.Timestamp('2001-01-06', tz=tz)

# dtarr[-2] will be the first date 2001-01-1
dtarr[-2] = pd.NaT

obj = box(dtarr)
assert obj.mean() == pd.Timestamp('2001-01-06 07:12:00', tz=tz)
assert obj.mean(skipna=False) is pd.NaT

@pytest.mark.parametrize('box', [Series, pd.Index, PeriodArray])
def test_period_mean(self, box):
# GH#24757
dti = pd.date_range('2001-01-01', periods=11)
# shuffle so that we are not just working with monotone-increasing
dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6])

# use hourly frequency to avoid rounding errors in expected results
# TODO: flesh this out with different frequencies
parr = dti._data.to_period('H')
obj = box(parr)
with pytest.raises(TypeError, match="ambiguous"):
obj.mean()
with pytest.raises(TypeError, match="ambiguous"):
obj.mean(skipna=True)

# parr[-2] will be the first date 2001-01-1
parr[-2] = pd.NaT

with pytest.raises(TypeError, match="ambiguous"):
obj.mean()
with pytest.raises(TypeError, match="ambiguous"):
obj.mean(skipna=True)

@pytest.mark.parametrize('box', [Series, pd.Index, TimedeltaArray])
def test_td64_mean(self, box):
tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4],
unit='D')

tdarr = tdi._data
obj = box(tdarr)

result = obj.mean()
expected = np.array(tdarr).mean()
assert result == expected

tdarr[0] = pd.NaT
assert obj.mean(skipna=False) is pd.NaT

result2 = obj.mean(skipna=True)
assert result2 == tdi[1:].mean()

# exact equality fails by 1 nanosecond
assert result2.round('us') == (result * 11. / 10).round('us')


class TestSeriesStatReductions:
# Note: the name TestSeriesStatReductions indicates these tests
# were moved from a series-specific test file, _not_ that these tests are
Expand Down

0 comments on commit efc7f2f

Please sign in to comment.