Skip to content

Commit

Permalink
ENH: Allow adjust=False when times is provided (#59142)
Browse files Browse the repository at this point in the history
* add adjust parameter to the ewma variable times test. Add tests for disallowed decay-specification parameters when times is specified and adjust=False

* allow adjust=False when times is provided

* re-calculate alpha each iteration for irregular-spaced time series

* whatsnew entry for allowing adjust=False with times

* pre-commit style fixes

* reduce line lengths to comply with pre-commit

* reduce line lengths and apply ruff-reformat changes
  • Loading branch information
tserrao authored Jul 8, 2024
1 parent 6090042 commit bd405e8
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 10 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Other enhancements
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`)
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1813,6 +1813,9 @@ def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
if normalize:
# avoid numerical errors on constant series
if weighted != cur:
if not adjust and com == 1:
# update in case of irregular-interval series
new_wt = 1. - old_wt
weighted = old_wt * weighted + new_wt * cur
weighted /= (old_wt + new_wt)
if adjust:
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,10 @@ class ExponentialMovingWindow(BaseWindow):
Provide exponentially weighted (EW) calculations.
Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be
provided if ``times`` is not provided. If ``times`` is provided,
provided if ``times`` is not provided. If ``times`` is provided and ``adjust=True``,
``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided.
If ``times`` is provided and ``adjust=False``, ``halflife`` must be the only
provided decay-specification parameter.
Parameters
----------
Expand Down Expand Up @@ -358,8 +360,6 @@ def __init__(
self.ignore_na = ignore_na
self.times = times
if self.times is not None:
if not self.adjust:
raise NotImplementedError("times is not supported with adjust=False.")
times_dtype = getattr(self.times, "dtype", None)
if not (
is_datetime64_dtype(times_dtype)
Expand All @@ -376,6 +376,11 @@ def __init__(
# Halflife is no longer applicable when calculating COM
# But allow COM to still be calculated if the user passes other decay args
if common.count_not_none(self.com, self.span, self.alpha) > 0:
if not self.adjust:
raise NotImplementedError(
"None of com, span, or alpha can be specified if "
"times is provided and adjust=False"
)
self._com = get_center_of_mass(self.com, self.span, None, self.alpha)
else:
self._com = 1.0
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/window/numba_.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ def ewm(
# note that len(deltas) = len(vals) - 1 and deltas[i]
# is to be used in conjunction with vals[i+1]
old_wt *= old_wt_factor ** deltas[start + j - 1]
if not adjust and com == 1:
# update in case of irregular-interval time series
new_wt = 1.0 - old_wt
else:
weighted = old_wt_factor * weighted
if is_observation:
Expand Down Expand Up @@ -324,6 +327,9 @@ def ewm_table(
# note that len(deltas) = len(vals) - 1 and deltas[i]
# is to be used in conjunction with vals[i+1]
old_wt[j] *= old_wt_factor ** deltas[i - 1]
if not adjust and com == 1:
# update in case of irregular-interval time series
new_wt = 1.0 - old_wt[j]
else:
weighted[j] = old_wt_factor * weighted[j]
if is_observations[j]:
Expand Down
61 changes: 54 additions & 7 deletions pandas/tests/window/test_ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
tm.assert_frame_equal(result, expected)


def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit):
def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit, adjust):
# GH 54328
tz = tz_aware_fixture
halflife = "23 days"
times = (
Expand All @@ -112,8 +113,11 @@ def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit):
)
data = np.arange(3)
df = DataFrame(data)
result = df.ewm(halflife=halflife, times=times).mean()
expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
result = df.ewm(halflife=halflife, times=times, adjust=adjust).mean()
if adjust:
expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
else:
expected = DataFrame([0.0, 0.23762518642226227, 1.534926369128742])
tm.assert_frame_equal(result, expected)


Expand Down Expand Up @@ -148,13 +152,56 @@ def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na):
assert result == expected


def test_ewma_times_adjust_false_raises():
# GH 40098
def test_ewma_times_adjust_false_with_disallowed_com():
# GH 54328
with pytest.raises(
NotImplementedError,
match=(
"None of com, span, or alpha can be specified "
"if times is provided and adjust=False"
),
):
Series(range(1)).ewm(
0.1,
adjust=False,
times=date_range("2000", freq="D", periods=1),
halflife="1D",
)


def test_ewma_times_adjust_false_with_disallowed_alpha():
# GH 54328
with pytest.raises(
NotImplementedError, match="times is not supported with adjust=False."
NotImplementedError,
match=(
"None of com, span, or alpha can be specified "
"if times is provided and adjust=False"
),
):
Series(range(1)).ewm(
0.1,
adjust=False,
times=date_range("2000", freq="D", periods=1),
alpha=0.5,
halflife="1D",
)


def test_ewma_times_adjust_false_with_disallowed_span():
# GH 54328
with pytest.raises(
NotImplementedError,
match=(
"None of com, span, or alpha can be specified "
"if times is provided and adjust=False"
),
):
Series(range(1)).ewm(
0.1, adjust=False, times=date_range("2000", freq="D", periods=1)
0.1,
adjust=False,
times=date_range("2000", freq="D", periods=1),
span=10,
halflife="1D",
)


Expand Down

0 comments on commit bd405e8

Please sign in to comment.