Skip to content

Commit

Permalink
BUG: rolling.quantile does not return an interpolated result (#16247)
Browse files Browse the repository at this point in the history
  • Loading branch information
guillemborrell authored and jreback committed Jul 10, 2017
1 parent a5477b7 commit a43c157
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 8 deletions.
185 changes: 185 additions & 0 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
from .pandas_vb_common import *
import pandas as pd
import numpy as np


class DataframeRolling(object):
goal_time = 0.2

def setup(self):
self.N = 100000
self.Ns = 10000
self.df = pd.DataFrame({'a': np.random.random(self.N)})
self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)})
self.wins = 10
self.winl = 1000

def time_rolling_quantile_0(self):
(self.df.rolling(self.wins).quantile(0.0))

def time_rolling_quantile_1(self):
(self.df.rolling(self.wins).quantile(1.0))

def time_rolling_quantile_median(self):
(self.df.rolling(self.wins).quantile(0.5))

def time_rolling_median(self):
(self.df.rolling(self.wins).median())

def time_rolling_median(self):
(self.df.rolling(self.wins).mean())

def time_rolling_max(self):
(self.df.rolling(self.wins).max())

def time_rolling_min(self):
(self.df.rolling(self.wins).min())

def time_rolling_std(self):
(self.df.rolling(self.wins).std())

def time_rolling_count(self):
(self.df.rolling(self.wins).count())

def time_rolling_skew(self):
(self.df.rolling(self.wins).skew())

def time_rolling_kurt(self):
(self.df.rolling(self.wins).kurt())

def time_rolling_sum(self):
(self.df.rolling(self.wins).sum())

def time_rolling_corr(self):
(self.dfs.rolling(self.wins).corr())

def time_rolling_cov(self):
(self.dfs.rolling(self.wins).cov())

def time_rolling_quantile_0_l(self):
(self.df.rolling(self.winl).quantile(0.0))

def time_rolling_quantile_1_l(self):
(self.df.rolling(self.winl).quantile(1.0))

def time_rolling_quantile_median_l(self):
(self.df.rolling(self.winl).quantile(0.5))

def time_rolling_median_l(self):
(self.df.rolling(self.winl).median())

def time_rolling_median_l(self):
(self.df.rolling(self.winl).mean())

def time_rolling_max_l(self):
(self.df.rolling(self.winl).max())

def time_rolling_min_l(self):
(self.df.rolling(self.winl).min())

def time_rolling_std_l(self):
(self.df.rolling(self.wins).std())

def time_rolling_count_l(self):
(self.df.rolling(self.wins).count())

def time_rolling_skew_l(self):
(self.df.rolling(self.wins).skew())

def time_rolling_kurt_l(self):
(self.df.rolling(self.wins).kurt())

def time_rolling_sum_l(self):
(self.df.rolling(self.wins).sum())


class SeriesRolling(object):
goal_time = 0.2

def setup(self):
self.N = 100000
self.Ns = 10000
self.df = pd.DataFrame({'a': np.random.random(self.N)})
self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)})
self.sr = self.df.a
self.srs = self.dfs.a
self.wins = 10
self.winl = 1000

def time_rolling_quantile_0(self):
(self.sr.rolling(self.wins).quantile(0.0))

def time_rolling_quantile_1(self):
(self.sr.rolling(self.wins).quantile(1.0))

def time_rolling_quantile_median(self):
(self.sr.rolling(self.wins).quantile(0.5))

def time_rolling_median(self):
(self.sr.rolling(self.wins).median())

def time_rolling_median(self):
(self.sr.rolling(self.wins).mean())

def time_rolling_max(self):
(self.sr.rolling(self.wins).max())

def time_rolling_min(self):
(self.sr.rolling(self.wins).min())

def time_rolling_std(self):
(self.sr.rolling(self.wins).std())

def time_rolling_count(self):
(self.sr.rolling(self.wins).count())

def time_rolling_skew(self):
(self.sr.rolling(self.wins).skew())

def time_rolling_kurt(self):
(self.sr.rolling(self.wins).kurt())

def time_rolling_sum(self):
(self.sr.rolling(self.wins).sum())

def time_rolling_corr(self):
(self.srs.rolling(self.wins).corr())

def time_rolling_cov(self):
(self.srs.rolling(self.wins).cov())

def time_rolling_quantile_0_l(self):
(self.sr.rolling(self.winl).quantile(0.0))

def time_rolling_quantile_1_l(self):
(self.sr.rolling(self.winl).quantile(1.0))

def time_rolling_quantile_median_l(self):
(self.sr.rolling(self.winl).quantile(0.5))

def time_rolling_median_l(self):
(self.sr.rolling(self.winl).median())

def time_rolling_median_l(self):
(self.sr.rolling(self.winl).mean())

def time_rolling_max_l(self):
(self.sr.rolling(self.winl).max())

def time_rolling_min_l(self):
(self.sr.rolling(self.winl).min())

def time_rolling_std_l(self):
(self.sr.rolling(self.wins).std())

def time_rolling_count_l(self):
(self.sr.rolling(self.wins).count())

def time_rolling_skew_l(self):
(self.sr.rolling(self.wins).skew())

def time_rolling_kurt_l(self):
(self.sr.rolling(self.wins).kurt())

def time_rolling_sum_l(self):
(self.sr.rolling(self.wins).sum())
5 changes: 4 additions & 1 deletion doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,11 @@ Plotting

Groupby/Resample/Rolling
^^^^^^^^^^^^^^^^^^^^^^^^
- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)

- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
- Bug in ``.rolling.quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`)


Sparse
^^^^^^
Expand All @@ -191,6 +193,7 @@ Categorical
^^^^^^^^^^^



Other
^^^^^
- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
15 changes: 13 additions & 2 deletions pandas/_libs/window.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1348,8 +1348,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
bint is_variable
ndarray[int64_t] start, end
ndarray[double_t] output
double vlow, vhigh

if quantile < 0.0 or quantile > 1.0:
if quantile <= 0.0 or quantile >= 1.0:
raise ValueError("quantile value {0} not in [0, 1]".format(quantile))

# we use the Fixed/Variable Indexer here as the
Expand Down Expand Up @@ -1391,7 +1392,17 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,

if nobs >= minp:
idx = int(quantile * <double>(nobs - 1))
output[i] = skiplist.get(idx)

# Single value in skip list
if nobs == 1:
output[i] = skiplist.get(0)

# Interpolated quantile
else:
vlow = skiplist.get(idx)
vhigh = skiplist.get(idx + 1)
output[i] = (vlow + (vhigh - vlow) *
(quantile * (nobs - 1) - idx))
else:
output[i] = NaN

Expand Down
11 changes: 9 additions & 2 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,8 +975,15 @@ def quantile(self, quantile, **kwargs):

def f(arg, *args, **kwargs):
minp = _use_window(self.min_periods, window)
return _window.roll_quantile(arg, window, minp, indexi,
self.closed, quantile)
if quantile == 1.0:
return _window.roll_max(arg, window, minp, indexi,
self.closed)
elif quantile == 0.0:
return _window.roll_min(arg, window, minp, indexi,
self.closed)
else:
return _window.roll_quantile(arg, window, minp, indexi,
self.closed, quantile)

return self._apply(f, 'quantile', quantile=quantile,
**kwargs)
Expand Down
41 changes: 38 additions & 3 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,8 +1122,19 @@ def test_rolling_quantile(self):
def scoreatpercentile(a, per):
values = np.sort(a, axis=0)

idx = per / 1. * (values.shape[0] - 1)
return values[int(idx)]
idx = int(per / 1. * (values.shape[0] - 1))

if idx == values.shape[0] - 1:
retval = values[-1]

else:
qlow = float(idx) / float(values.shape[0] - 1)
qhig = float(idx + 1) / float(values.shape[0] - 1)
vlow = values[idx]
vhig = values[idx + 1]
retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow)

return retval

for q in qs:

Expand All @@ -1138,6 +1149,30 @@ def alt(x):

self._check_moment_func(f, alt, name='quantile', quantile=q)

def test_rolling_quantile_np_percentile(self):
# #9413: Tests that rolling window's quantile default behavior
# is analogus to Numpy's percentile
row = 10
col = 5
idx = pd.date_range(20100101, periods=row, freq='B')
df = pd.DataFrame(np.random.rand(row * col).reshape((row, -1)),
index=idx)

df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0)
np_percentile = np.percentile(df, [25, 50, 75], axis=0)

tm.assert_almost_equal(df_quantile.values, np.array(np_percentile))

def test_rolling_quantile_series(self):
# #16211: Tests that rolling window's quantile default behavior
# is analogus to pd.Series' quantile
arr = np.arange(100)
s = pd.Series(arr)
q1 = s.quantile(0.1)
q2 = s.rolling(100).quantile(0.1).iloc[-1]

tm.assert_almost_equal(q1, q2)

def test_rolling_quantile_param(self):
ser = Series([0.0, .1, .5, .9, 1.0])

Expand Down Expand Up @@ -3558,7 +3593,7 @@ def test_ragged_quantile(self):

result = df.rolling(window='2s', min_periods=1).quantile(0.5)
expected = df.copy()
expected['B'] = [0.0, 1, 1.0, 3.0, 3.0]
expected['B'] = [0.0, 1, 1.5, 3.0, 3.5]
tm.assert_frame_equal(result, expected)

def test_ragged_std(self):
Expand Down

0 comments on commit a43c157

Please sign in to comment.