From 1890a88d4d1b2926e45f631330f6191642bef773 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 15 Dec 2015 07:02:58 -0500 Subject: [PATCH] cleanup based on comments --- doc/source/computation.rst | 35 ++-- doc/source/whatsnew/v0.14.0.txt | 15 +- doc/source/whatsnew/v0.15.0.txt | 103 ++++++++--- doc/source/whatsnew/v0.18.0.txt | 12 +- pandas/core/base.py | 15 +- pandas/core/groupby.py | 31 ++-- pandas/core/window.py | 305 ++++++++++++++++---------------- pandas/tests/test_window.py | 74 ++++++-- 8 files changed, 355 insertions(+), 235 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index a96c14a6e154f..39587e82731b0 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -243,6 +243,7 @@ accept the following arguments: - ``window``: size of moving window - ``min_periods``: threshold of non-null data points to require (otherwise result is NA) +- ``center``: boolean, whether to set the labels at the center (default is False) .. warning:: @@ -334,7 +335,7 @@ The following methods are available: :meth:`~Window.sum`, Sum of values :meth:`~Window.mean`, Mean of values -The weights used in the window are specified by the ``win_type``keyword. The list of recognized types are: +The weights used in the window are specified by the ``win_type`` keyword. The list of recognized types are: - ``boxcar`` - ``triang`` @@ -370,27 +371,12 @@ For some windowing functions, additional parameters must be specified: ser.rolling(window=5, win_type='gaussian').mean(std=0.1) -Centering Windows -~~~~~~~~~~~~~~~~~ - -By default the labels are set to the right edge of the window, but a -``center`` keyword is available so the labels can be set at the center. -This keyword is available in other rolling functions as well. - -.. ipython:: python - - ser.rolling(window=5, win_type='boxcar').mean() - - ser.rolling(window=5, win_type='boxcar', center=True).mean() - - ser.rolling(window=5, center=True).mean() - .. _stats.moments.normalization: .. note:: For ``.sum()`` with a ``win_type``, there is no normalization done to the - weights. Passing custom weights of ``[1, 1, 1]`` will yield a different + weights for the window. Passing custom weights of ``[1, 1, 1]`` will yield a different result than passing weights of ``[2, 2, 2]``, for example. When passing a ``win_type`` instead of explicitly specifying the weights, the weights are already normalized so that the largest weight is 1. @@ -399,6 +385,18 @@ This keyword is available in other rolling functions as well. such that the weights are normalized with respect to each other. Weights of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result. +Centering Windows +~~~~~~~~~~~~~~~~~ + +By default the labels are set to the right edge of the window, but a +``center`` keyword is available so the labels can be set at the center. +This keyword is available in other rolling functions as well. + +.. ipython:: python + + ser.rolling(window=5).mean() + ser.rolling(window=5, center=True).mean() + .. _stats.moments.binary: Binary Window Functions @@ -550,7 +548,7 @@ Furthermore you can pass a nested dict to indicate different aggregations on dif .. ipython:: python - r.agg({'A' : {'ra' : 'sum'}, 'B' : {'rb' : 'std' }}) + r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] }) .. _stats.moments.expanding: @@ -607,6 +605,7 @@ all accept are: - ``min_periods``: threshold of non-null data points to require. Defaults to minimum needed to compute statistic. No ``NaNs`` will be output once ``min_periods`` non-null data points have been seen. +- ``center``: boolean, whether to set the labels at the center (default is False) .. note:: diff --git a/doc/source/whatsnew/v0.14.0.txt b/doc/source/whatsnew/v0.14.0.txt index e2f96f204edab..67928af30bead 100644 --- a/doc/source/whatsnew/v0.14.0.txt +++ b/doc/source/whatsnew/v0.14.0.txt @@ -170,11 +170,18 @@ API changes :ref:`Computing rolling pairwise covariances and correlations ` in the docs. - .. ipython:: python + .. code-block:: python + + In [1]: df = DataFrame(np.random.randn(10,4),columns=list('ABCD')) + + In [4]: covs = pd.rolling_cov(df[['A','B','C']], df[['B','C','D']], 5, pairwise=True) - df = DataFrame(np.random.randn(10,4),columns=list('ABCD')) - covs = rolling_cov(df[['A','B','C']], df[['B','C','D']], 5, pairwise=True) - covs[df.index[-1]] + In [5]: covs[df.index[-1]] + Out[5]: + B C D + A 0.035310 0.326593 -0.505430 + B 0.137748 -0.006888 -0.005383 + C -0.006888 0.861040 0.020762 - ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index a33e0f19961ab..9651c1efeff4a 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -68,7 +68,7 @@ For full docs, see the :ref:`categorical introduction ` and the .. ipython:: python :okwarning: - + df = DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) df["grade"] = df["raw_grade"].astype("category") @@ -353,9 +353,15 @@ Rolling/Expanding Moments improvements New behavior - .. ipython:: python + .. code-block:: python - rolling_min(s, window=10, min_periods=5) + In [4]: pd.rolling_min(s, window=10, min_periods=5) + Out[4]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + dtype: float64 - :func:`rolling_max`, :func:`rolling_min`, :func:`rolling_sum`, :func:`rolling_mean`, :func:`rolling_median`, :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, :func:`rolling_quantile`, @@ -381,9 +387,15 @@ Rolling/Expanding Moments improvements New behavior (note final value is ``5 = sum([2, 3, NaN])``): - .. ipython:: python + .. code-block:: python - rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) + In [7]: rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) + Out[7]: + 0 1 + 1 3 + 2 6 + 3 5 + dtype: float64 - :func:`rolling_window` now normalizes the weights properly in rolling mean mode (`mean=True`) so that the calculated weighted means (e.g. 'triang', 'gaussian') are distributed about the same means as those @@ -397,20 +409,27 @@ Rolling/Expanding Moments improvements .. code-block:: python - In [39]: rolling_window(s, window=3, win_type='triang', center=True) - Out[39]: - 0 NaN - 1 6.583333 - 2 6.883333 - 3 6.683333 - 4 NaN - dtype: float64 + In [39]: rolling_window(s, window=3, win_type='triang', center=True) + Out[39]: + 0 NaN + 1 6.583333 + 2 6.883333 + 3 6.683333 + 4 NaN + dtype: float64 New behavior .. ipython:: python - rolling_window(s, window=3, win_type='triang', center=True) + In [10]: pd.rolling_window(s, window=3, win_type='triang', center=True) + Out[10]: + 0 NaN + 1 9.875 + 2 10.325 + 3 10.025 + 4 NaN + dtype: float64 - Removed ``center`` argument from all :func:`expanding_ ` functions (see :ref:`list `), as the results produced when ``center=True`` did not make much sense. (:issue:`7925`) @@ -449,9 +468,17 @@ Rolling/Expanding Moments improvements New behavior (note values start at index ``4``, the location of the 2nd (since ``min_periods=2``) non-empty value): - .. ipython:: python + .. code-block:: python - ewma(s, com=3., min_periods=2) + In [2]: pd.ewma(s, com=3., min_periods=2) + Out[2]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + 4 1.759644 + 5 2.383784 + dtype: float64 - :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` now have an optional ``adjust`` argument, just like :func:`ewma` does, @@ -465,11 +492,28 @@ Rolling/Expanding Moments improvements When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation. (:issue:`7543`) - .. ipython:: python + .. code-block:: python + + In [7]: pd.ewma(Series([None, 1., 8.]), com=2.) + Out[7]: + 0 NaN + 1 1.0 + 2 5.2 + dtype: float64 + + In [8]: pd.ewma(Series([1., None, 8.]), com=2., ignore_na=True) # pre-0.15.0 behavior + Out[8]: + 0 1.0 + 1 1.0 + 2 5.2 + dtype: float64 - ewma(Series([None, 1., 8.]), com=2.) - ewma(Series([1., None, 8.]), com=2., ignore_na=True) # pre-0.15.0 behavior - ewma(Series([1., None, 8.]), com=2., ignore_na=False) # new default + In [9]: pd.ewma(Series([1., None, 8.]), com=2., ignore_na=False) # new default + Out[9]: + 0 1.000000 + 1 1.000000 + 2 5.846154 + dtype: float64 .. warning:: @@ -525,10 +569,23 @@ Rolling/Expanding Moments improvements By comparison, the following 0.15.0 results have a ``NaN`` for entry ``0``, and the debiasing factors are decreasing (towards 1.25): - .. ipython:: python + .. code-block:: python - ewmvar(s, com=2., bias=False) - ewmvar(s, com=2., bias=False) / ewmvar(s, com=2., bias=True) + In [14]: pd.ewmvar(s, com=2., bias=False) + Out[14]: + 0 NaN + 1 0.500000 + 2 1.210526 + 3 4.089069 + dtype: float64 + + In [15]: pd.ewmvar(s, com=2., bias=False) / pd.ewmvar(s, com=2., bias=True) + Out[15]: + 0 NaN + 1 2.083333 + 2 1.583333 + 3 1.425439 + dtype: float64 See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7912`) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 96618ffbc36cb..7f63096d7c045 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -86,8 +86,8 @@ And multiple aggregations .. ipython:: python - r.agg({'A' : {'ra' : ['mean','std']}, - 'B' : {'rb' : ['mean','std']}}) + r.agg({'A' : ['mean','std'], + 'B' : ['mean','std']}) .. _whatsnew_0180.enhancements.other: @@ -239,15 +239,17 @@ Deprecations 2 0.5 dtype: float64 -- The the ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. (:issue:`11603`) +- The the ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. You can simply resample the input prior to creating a window function. (:issue:`11603`). + + For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D',how='max').rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window. .. _whatsnew_0180.prior_deprecations: Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- Removal of ``rolling_corr_parwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) -- Removal of ``expanding_corr_parwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``rolling_corr_pairwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``expanding_corr_pairwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 42e04d5a03696..a1e1c20344ea4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -358,11 +358,10 @@ def _gotitem(self, key, ndim, subset=None): """ _see_also_template = """ - See also -------- -`pandas.Series.%(name)s` -`pandas.DataFrame.%(name)s` +pandas.Series.%(name)s +pandas.DataFrame.%(name)s """ def aggregate(self, func, *args, **kwargs): @@ -422,7 +421,7 @@ def _aggregate(self, arg, *args, **kwargs): else: for col, agg_how in compat.iteritems(arg): colg = self._gotitem(col, ndim=1) - result[col] = colg.aggregate(agg_how, _level=(_level or 0) + 1) + result[col] = colg.aggregate(agg_how, _level=None) keys.append(col) if isinstance(list(result.values())[0], com.ABCDataFrame): @@ -451,12 +450,16 @@ def _aggregate_multiple_funcs(self, arg, _level): if self.axis != 0: raise NotImplementedError("axis other than 0 is not supported") - obj = self._obj_with_exclusions + if self._selected_obj.ndim == 1: + obj = self._selected_obj + else: + obj = self._obj_with_exclusions + results = [] keys = [] # degenerate case - if obj.ndim == 1: + if obj.ndim==1: for a in arg: try: colg = self._gotitem(obj.name, ndim=1, subset=obj) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ac07c9487fc15..5428ee5484bfa 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -40,15 +40,11 @@ _doc_template = """ - Returns - ------- - same type as input - See also -------- - `pandas.Series.%(name)s` - `pandas.DataFrame.%(name)s` - `pandas.Panel.%(name)s` + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s """ # special case to prevent duplicate plots when catching exceptions when @@ -628,7 +624,6 @@ def __iter__(self): return self.grouper.get_iterator(self.obj, axis=self.axis) @Substitution(name='groupby') - @Appender(_doc_template) def apply(self, func, *args, **kwargs): """ Apply function and combine results together in an intelligent way. The @@ -664,10 +659,12 @@ def apply(self, func, *args, **kwargs): side-effects, as they will take effect twice for the first group. - See also -------- - aggregate, transform""" + aggregate, transform + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s""" func = self._is_builtin_func(func) @@ -759,7 +756,7 @@ def std(self, ddof=1): Parameters ---------- ddof : integer, default 1 - degrees of freedom + degrees of freedom """ # todo, implement at cython level? @@ -776,7 +773,7 @@ def var(self, ddof=1): Parameters ---------- ddof : integer, default 1 - degrees of freedom + degrees of freedom """ if ddof == 1: @@ -797,7 +794,7 @@ def sem(self, ddof=1): Parameters ---------- ddof : integer, default 1 - degrees of freedom + degrees of freedom """ return self.std(ddof=ddof)/np.sqrt(self.count()) @@ -868,8 +865,8 @@ def nth(self, n, dropna=None): 1 4 5 6 >>> g.nth(1, dropna='any') # NaNs denote group exhausted when using dropna - B - A + B + A 1 NaN 5 NaN """ @@ -978,13 +975,13 @@ def cumcount(self, ascending=True): Parameters ---------- ascending : bool, default True - If False, number in reverse, from length of group - 1 to 0. + If False, number in reverse, from length of group - 1 to 0. Examples -------- >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], - ... columns=['A']) + ... columns=['A']) >>> df A 0 a diff --git a/pandas/core/window.py b/pandas/core/window.py index 2e9b4922a039a..4bbdf444ac2a7 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -30,8 +30,8 @@ See also -------- -`pandas.Series.%(name)s` -`pandas.DataFrame.%(name)s` +pandas.Series.%(name)s +pandas.DataFrame.%(name)s """ class _Window(PandasObject, SelectionMixin): @@ -65,8 +65,14 @@ def _setup(self): def _convert_freq(self, how=None): """ resample according to the how, return a new object """ + obj = self._selected_obj if self.freq is not None and isinstance(obj, (com.ABCSeries, com.ABCDataFrame)): + if how is not None: + warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " + "to passing to a window function", + FutureWarning, stacklevel=6) + obj = obj.resample(self.freq, how=how) return obj @@ -118,7 +124,7 @@ def _get_window(self, other=None): def __unicode__(self): """ provide a nice str repr of our rolling object """ - attrs = [ "{k}->{v}".format(k=k,v=getattr(self,k)) \ + attrs = [ "{k}={v}".format(k=k,v=getattr(self,k)) \ for k in self._attributes if getattr(self,k,None) is not None ] return "{klass} [{attrs}]".format(klass=self.__class__.__name__, attrs=','.join(attrs)) @@ -227,6 +233,22 @@ def aggregate(self, arg, *args, **kwargs): agg = aggregate + _shared_docs['sum'] = dedent(""" + %(name)s sum + + Parameters + ---------- + how : string, default None (DEPRECATED) + Method for down- or re-sampling""") + + _shared_docs['mean'] = dedent(""" + %(name)s mean + + Parameters + ---------- + how : string, default None (DEPRECATED) + Method for down- or re-sampling""") + class Window(_Window): """ Provides rolling transformations. @@ -241,7 +263,7 @@ class Window(_Window): min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). - freq : string or DateOffset object, optional (default None) + freq : string or DateOffset object, optional (default None) (DEPRECATED) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. center : boolean, default False @@ -345,8 +367,8 @@ def f(arg, *args, **kwargs): return self._wrap_results(results, blocks, obj) @Substitution(name='rolling') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) if result is None: @@ -358,13 +380,15 @@ def aggregate(self, arg, *args, **kwargs): agg = aggregate - @Substitution(name='rolling') + @Substitution(name='window') @Appender(_doc_template) + @Appender(_shared_docs['sum']) def sum(self, **kwargs): return self._apply_window(mean=False, **kwargs) - @Substitution(name='rolling') + @Substitution(name='window') @Appender(_doc_template) + @Appender(_shared_docs['mean']) def mean(self, **kwargs): return self._apply_window(mean=True, **kwargs) @@ -471,8 +495,8 @@ def count(self): Parameters ---------- func : function - Must produce a single value from an ndarray input - *args and **kwargs are passed to the function""") + Must produce a single value from an ndarray input + *args and **kwargs are passed to the function""") def apply(self, func, args=(), kwargs={}): _level = kwargs.pop('_level',None) @@ -484,9 +508,8 @@ def f(arg, window, min_periods): return self._apply(f, center=False) - _shared_docs['sum'] = """%(name)s sum""" - def sum(self): - return self._apply('roll_sum') + def sum(self, **kwargs): + return self._apply('roll_sum', **kwargs) _shared_docs['max'] = dedent(""" %(name)s maximum @@ -494,15 +517,11 @@ def sum(self): Parameters ---------- how : string, default 'max' (DEPRECATED) - Method for down- or re-sampling""") - def max(self, how=None): - if how is not None: - warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " - "to passing to a window function", - FutureWarning, stacklevel=3) - else: + Method for down- or re-sampling""") + def max(self, how=None, **kwargs): + if self.freq is not None and how is None: how = 'max' - return self._apply('roll_max', how=how) + return self._apply('roll_max', how=how, **kwargs) _shared_docs['min'] = dedent(""" %(name)s minimum @@ -510,19 +529,14 @@ def max(self, how=None): Parameters ---------- how : string, default 'min' (DEPRECATED) - Method for down- or re-sampling""") - def min(self, how=None): - if how is not None: - warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " - "to passing to a window function", - FutureWarning, stacklevel=3) - else: + Method for down- or re-sampling""") + def min(self, how=None, **kwargs): + if self.freq is not None and how is None: how = 'min' - return self._apply('roll_min', how=how) + return self._apply('roll_min', how=how, **kwargs) - _shared_docs['mean'] = """%(name)s mean""" - def mean(self): - return self._apply('roll_mean') + def mean(self, **kwargs): + return self._apply('roll_mean', **kwargs) _shared_docs['median'] = dedent(""" %(name)s median @@ -530,15 +544,11 @@ def mean(self): Parameters ---------- how : string, default 'median' (DEPRECATED) - Method for down- or re-sampling""") - def median(self, how=None): - if how is not None: - warnings.warn("The how kw argument is deprecated and removed in a future version. You can resample prior " - "to passing to a window function", - FutureWarning, stacklevel=3) - else: + Method for down- or re-sampling""") + def median(self, how=None, **kwargs): + if self.freq is not None and how is None: how = 'median' - return self._apply('roll_median_c', how=how) + return self._apply('roll_median_c', how=how, **kwargs) _shared_docs['std'] = dedent(""" %(name)s standard deviation @@ -546,16 +556,16 @@ def median(self, how=None): Parameters ---------- ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements.""") + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""") - def std(self, ddof=1): + def std(self, ddof=1, **kwargs): window = self._get_window() def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) return _zsqrt(algos.roll_var(arg, window, minp, ddof)) - return self._apply(f, check_minp=_require_min_periods(1)) + return self._apply(f, check_minp=_require_min_periods(1), **kwargs) _shared_docs['var'] = dedent(""" %(name)s variance @@ -563,23 +573,26 @@ def f(arg, *args, **kwargs): Parameters ---------- ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements.""") + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.""") - def var(self, ddof=1): + def var(self, ddof=1, **kwargs): return self._apply('roll_var', check_minp=_require_min_periods(1), - ddof=ddof) + ddof=ddof, + **kwargs) _shared_docs['skew'] = """Unbiased %(name)s skewness""" - def skew(self): + def skew(self, **kwargs): return self._apply('roll_skew', - check_minp=_require_min_periods(3)) + check_minp=_require_min_periods(3), + **kwargs) _shared_docs['kurt'] = """Unbiased %(name)s kurtosis""" - def kurt(self): + def kurt(self, **kwargs): return self._apply('roll_kurt', - check_minp=_require_min_periods(4)) + check_minp=_require_min_periods(4), + **kwargs) _shared_docs['quantile'] = dedent(""" %(name)s quantile @@ -587,15 +600,15 @@ def kurt(self): Parameters ---------- quantile : float - 0 <= quantile <= 1""") + 0 <= quantile <= 1""") - def quantile(self, quantile): + def quantile(self, quantile, **kwargs): window = self._get_window() def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) return algos.roll_quantile(arg, window, minp, quantile) - return self._apply(f) + return self._apply(f, **kwargs) _shared_docs['cov'] = dedent(""" %(name)s sample covariance @@ -614,7 +627,7 @@ def f(arg, *args, **kwargs): Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements.""") - def cov(self, other=None, pairwise=None, ddof=1): + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): if other is None: other = self._selected_obj pairwise = True if pairwise is None else pairwise # only default unset @@ -622,8 +635,8 @@ def cov(self, other=None, pairwise=None, ddof=1): window = self._get_window(other) def _get_cov(X, Y): - mean = lambda x: x.rolling(window, self.min_periods, center=self.center).mean() - count = (X+Y).rolling(window=window, center=self.center).count() + mean = lambda x: x.rolling(window, self.min_periods, center=self.center).mean(**kwargs) + count = (X+Y).rolling(window=window, center=self.center).count(**kwargs) bias_adj = count / (count - ddof) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)) @@ -642,7 +655,7 @@ def _get_cov(X, Y): will be a Panel in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used.""") - def corr(self, other=None, pairwise=None): + def corr(self, other=None, pairwise=None, **kwargs): if other is None: other = self._selected_obj pairwise = True if pairwise is None else pairwise # only default unset @@ -659,12 +672,12 @@ def _get_corr(a, b): freq=self.freq, center=self.center) - return a.cov(b) / (a.std() * b.std()) + return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) return _flex_binary_moment(self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)) class Rolling(_Rolling_and_Expanding): """ - Provides rolling transformations. + Provides rolling window calculcations. .. versionadded:: 0.18.0 @@ -698,8 +711,8 @@ class Rolling(_Rolling_and_Expanding): """ @Substitution(name='rolling') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(Rolling, self).aggregate(arg, *args, **kwargs) @@ -720,8 +733,8 @@ def apply(self, func, args=(), kwargs={}): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['sum']) - def sum(self): - return super(Rolling, self).sum() + def sum(self, **kwargs): + return super(Rolling, self).sum(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @@ -738,8 +751,8 @@ def min(self, **kwargs): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['mean']) - def mean(self): - return super(Rolling, self).mean() + def mean(self, **kwargs): + return super(Rolling, self).mean(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @@ -750,44 +763,44 @@ def median(self, **kwargs): @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['std']) - def std(self, ddof=1): - return super(Rolling, self).std(ddof=ddof) + def std(self, ddof=1, **kwargs): + return super(Rolling, self).std(ddof=ddof, **kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['var']) - def var(self, ddof=1): - return super(Rolling, self).var(ddof=ddof) + def var(self, ddof=1, **kwargs): + return super(Rolling, self).var(ddof=ddof, **kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['skew']) - def skew(self): - return super(Rolling, self).skew() + def skew(self, **kwargs): + return super(Rolling, self).skew(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['kurt']) - def kurt(self): - return super(Rolling, self).kurt() + def kurt(self, **kwargs): + return super(Rolling, self).kurt(**kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['quantile']) - def quantile(self, quantile): - return super(Rolling, self).quantile(quantile=quantile) + def quantile(self, quantile, **kwargs): + return super(Rolling, self).quantile(quantile=quantile, **kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['cov']) - def cov(self, other=None, pairwise=None, ddof=1): - return super(Rolling, self).cov(other=other, pairwise=pairwise, ddof=ddof) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super(Rolling, self).cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) @Substitution(name='rolling') @Appender(_doc_template) @Appender(_shared_docs['corr']) - def corr(self, other=None, pairwise=None): - return super(Rolling, self).corr(other=other, pairwise=pairwise) + def corr(self, other=None, pairwise=None, **kwargs): + return super(Rolling, self).corr(other=other, pairwise=pairwise, **kwargs) class Expanding(_Rolling_and_Expanding): """ @@ -837,8 +850,8 @@ def _get_window(self, other=None): return max((len(obj) + len(obj)), self.min_periods) if self.min_periods else (len(obj) + len(obj)) @Substitution(name='expanding') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(Expanding, self).aggregate(arg, *args, **kwargs) @@ -847,8 +860,8 @@ def aggregate(self, arg, *args, **kwargs): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['count']) - def count(self): - return super(Expanding, self).count() + def count(self, **kwargs): + return super(Expanding, self).count(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @@ -859,8 +872,8 @@ def apply(self, func, args=(), kwargs={}): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['sum']) - def sum(self): - return super(Expanding, self).sum() + def sum(self, **kwargs): + return super(Expanding, self).sum(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @@ -877,8 +890,8 @@ def min(self, **kwargs): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['mean']) - def mean(self): - return super(Expanding, self).mean() + def mean(self, **kwargs): + return super(Expanding, self).mean(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @@ -889,44 +902,68 @@ def median(self, **kwargs): @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['std']) - def std(self, ddof=1): - return super(Expanding, self).std(ddof=ddof) + def std(self, ddof=1, **kwargs): + return super(Expanding, self).std(ddof=ddof, **kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['var']) - def var(self, ddof=1): - return super(Expanding, self).var(ddof=ddof) + def var(self, ddof=1, **kwargs): + return super(Expanding, self).var(ddof=ddof, **kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['skew']) - def skew(self): - return super(Expanding, self).skew() + def skew(self, **kwargs): + return super(Expanding, self).skew(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['kurt']) - def kurt(self): - return super(Expanding, self).kurt() + def kurt(self, **kwargs): + return super(Expanding, self).kurt(**kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['quantile']) - def quantile(self, quantile): - return super(Expanding, self).quantile(quantile=quantile) + def quantile(self, quantile, **kwargs): + return super(Expanding, self).quantile(quantile=quantile, **kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['cov']) - def cov(self, other=None, pairwise=None, ddof=1): - return super(Expanding, self).cov(other=other, pairwise=pairwise, ddof=ddof) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super(Expanding, self).cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) @Substitution(name='expanding') @Appender(_doc_template) @Appender(_shared_docs['corr']) - def corr(self, other=None, pairwise=None): - return super(Expanding, self).corr(other=other, pairwise=pairwise) + def corr(self, other=None, pairwise=None, **kwargs): + return super(Expanding, self).corr(other=other, pairwise=pairwise, **kwargs) + +_bias_template = """ + +Parameters +---------- +bias : boolean, default False + Use a standard estimation bias correction +""" + +_pairwise_template = """ + +Parameters +---------- +other : Series, DataFrame, or ndarray, optional + if not supplied then will default to self and produce pairwise output +pairwise : bool, default None + If False then only matching columns between self and other will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. +bias : boolean, default False + Use a standard estimation bias correction +""" class EWM(_Rolling): """ @@ -1012,8 +1049,8 @@ def _constructor(self): return EWM @Substitution(name='ewm') - @Appender(SelectionMixin._agg_doc) @Appender(SelectionMixin._see_also_template) + @Appender(SelectionMixin._agg_doc) def aggregate(self, arg, *args, **kwargs): return super(EWM, self).aggregate(arg, *args, **kwargs) @@ -1062,33 +1099,23 @@ def func(arg): @Substitution(name='ewm') @Appender(_doc_template) - def mean(self): + def mean(self, **kwargs): """exponential weighted moving average""" - return self._apply('ewma') + return self._apply('ewma', **kwargs) @Substitution(name='ewm') @Appender(_doc_template) - def std(self, bias=False): - """exponential weighted moving stddev - - Parameters - ---------- - bias : boolean, default False - Use a standard estimation bias correction - """ - return _zsqrt(self.var(bias=bias)) + @Appender(_bias_template) + def std(self, bias=False, **kwargs): + """exponential weighted moving stddev""" + return _zsqrt(self.var(bias=bias, **kwargs)) vol=std @Substitution(name='ewm') @Appender(_doc_template) - def var(self, bias=False): - """exponential weighted moving average - - Parameters - ---------- - bias : boolean, default False - Use a standard estimation bias correction - """ + @Appender(_bias_template) + def var(self, bias=False, **kwargs): + """exponential weighted moving variance""" def f(arg): return algos.ewmcov(arg, arg, @@ -1098,26 +1125,13 @@ def f(arg): int(self.min_periods), int(bias)) - return self._apply(f) + return self._apply(f, **kwargs) @Substitution(name='ewm') @Appender(_doc_template) - def cov(self, other=None, pairwise=None, bias=False): - """exponential weighted sample covariance - - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - if not supplied then will default to self and produce pairwise output - pairwise : bool, default None - If False then only matching columns between self and other will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. - bias : boolean, default False - Use a standard estimation bias correction - """ + @Appender(_pairwise_template) + def cov(self, other=None, pairwise=None, bias=False, **kwargs): + """exponential weighted sample covariance""" if other is None: other = self._selected_obj pairwise = True if pairwise is None else pairwise # only default unset @@ -1139,20 +1153,9 @@ def _get_cov(X, Y): @Substitution(name='ewm') @Appender(_doc_template) - def corr(self, other=None, pairwise=None): - """exponential weighted sample correlation - - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - if not supplied then will default to self and produce pairwise output - pairwise : bool, default None - If False then only matching columns between self and other will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. - """ + @Appender(_pairwise_template) + def corr(self, other=None, pairwise=None, **kwargs): + """exponential weighted sample correlation""" if other is None: other = self._selected_obj pairwise = True if pairwise is None else pairwise # only default unset diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index b81b3a87ab5df..4d7f9292705ad 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -130,17 +130,17 @@ def compare(result, expected): result = r.aggregate({'A': ['mean','std']}) expected = pd.concat([a_mean,a_std],axis=1) - expected.columns = pd.MultiIndex.from_product([['A'],['mean','std']]) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','std')]) assert_frame_equal(result, expected) result = r['A'].aggregate(['mean','sum']) expected = pd.concat([a_mean,a_sum],axis=1) - expected.columns = pd.MultiIndex.from_product([['A'],['mean','sum']]) + expected.columns = ['mean','sum'] assert_frame_equal(result, expected) result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' } }) expected = pd.concat([a_mean,a_sum],axis=1) - expected.columns = pd.MultiIndex.from_product([['A'],['mean','sum']]) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','sum')]) compare(result, expected) result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' }, @@ -150,6 +150,13 @@ def compare(result, expected): ('B','mean2'),('B','sum2')]) compare(result, expected) + result = r.aggregate({'A': ['mean','std'], + 'B': ['mean','std']}) + expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','std'), + ('B','mean'),('B','std')]) + compare(result, expected) + result = r.aggregate({'r1' : { 'A' : ['mean','sum'] }, 'r2' : { 'B' : ['mean','sum'] }}) expected = pd.concat([a_mean,a_sum,b_mean,b_sum],axis=1) @@ -172,10 +179,28 @@ def compare(result, expected): expected = pd.concat([a_sum,rcustom],axis=1) compare(result, expected) + def test_agg_consistency(self): + + df = DataFrame({'A' : range(5), + 'B' : range(0,10,2)}) + r = df.rolling(window=3) + + result = r.agg([np.sum, np.mean]).columns + expected = pd.MultiIndex.from_product([list('AB'),['sum','mean']]) + tm.assert_index_equal(result, expected) + + result = r['A'].agg([np.sum, np.mean]).columns + expected = pd.Index(['sum','mean']) + tm.assert_index_equal(result, expected) + + result = r.agg({'A' : [np.sum, np.mean]}).columns + expected = pd.MultiIndex.from_tuples([('A','sum'),('A','mean')]) + tm.assert_index_equal(result, expected) + def test_window_with_args(self): + tm._skip_if_no_scipy() # make sure that we are aggregating window functions correctly with arg - r = Series(np.random.randn(100)).rolling(window=10,min_periods=1,win_type='gaussian') expected = pd.concat([r.mean(std=10),r.mean(std=.01)],axis=1) expected.columns = ['',''] @@ -200,6 +225,31 @@ def test_preserve_metadata(self): self.assertEqual(s2.name, 'foo') self.assertEqual(s3.name, 'foo') + def test_how_compat(self): + # in prior versions, we would allow how to be used in the resample + # now that its deprecated, we need to handle this in the actual + # aggregation functions + s = pd.Series(np.random.randn(20), index=pd.date_range('1/1/2000', periods=20, freq='12H')) + + for how in ['min','max','median']: + for op in ['mean','sum','std','var','kurt','skew']: + for t in ['rolling','expanding']: + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + + dfunc = getattr(pd,"{0}_{1}".format(t,op)) + if dfunc is None: + continue + + if t == 'rolling': + kwargs = {'window' : 5} + else: + kwargs = {} + result = dfunc(s, freq='D', how=how, **kwargs) + + expected = getattr(getattr(s,t)(freq='D', **kwargs),op)(how=how) + assert_series_equal(result, expected) + class TestDeprecations(Base): """ test that we are catching deprecation warnings """ @@ -495,11 +545,12 @@ def test_cmov_window_special_linear_range(self): assert_series_equal(xp, rs) def test_rolling_median(self): - self._check_moment_func(mom.rolling_median, np.median, name='median') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_median, np.median, name='median') def test_rolling_min(self): - self._check_moment_func(mom.rolling_min, np.min, name='min') - + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_min, np.min, name='min') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): a = np.array([1, 2, 3, 4, 5]) @@ -510,7 +561,8 @@ def test_rolling_min(self): np.array([1,2, 3]), window=3, min_periods=5) def test_rolling_max(self): - self._check_moment_func(mom.rolling_max, np.max, name='max') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self._check_moment_func(mom.rolling_max, np.max, name='max') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): a = np.array([1, 2, 3, 4, 5]) @@ -2177,7 +2229,7 @@ def test_rolling_max_how_resample(self): for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').max(how='mean') - assert_series_equal(expected, x) + assert_series_equal(expected, x) def test_rolling_min_how_resample(self): @@ -2198,7 +2250,7 @@ def test_rolling_min_how_resample(self): for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): r = series.rolling(window=1, freq='D') - assert_series_equal(expected, r.min()) + assert_series_equal(expected, r.min()) def test_rolling_median_how_resample(self): @@ -2218,7 +2270,7 @@ def test_rolling_median_how_resample(self): for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').median() - assert_series_equal(expected, x) + assert_series_equal(expected, x) def test_rolling_median_memory_error(self): # GH11722