Skip to content

Commit

Permalink
ENH: Option to return -inf/inf as lower/upper bound of quantiles, see p…
Browse files Browse the repository at this point in the history
  • Loading branch information
dberenbaum committed Jul 28, 2018
1 parent 114f415 commit b868335
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ New features

- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)

- :func: quantile and :func: qcut now accept ``bounded`` as a keyword
argument, allowing for unbounded quantiles such that the lower/upper bounds are -inf/inf (:issue:`17282`)

.. _whatsnew_0240.enhancements.extension_array_operators:

``ExtensionArray`` operator support
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,7 @@ def _broadcast(arr_or_scalar, shape):
}


def quantile(x, q, interpolation_method='fraction'):
def quantile(x, q, bounded=True, interpolation_method='fraction'):
"""
Compute sample quantile or quantiles of the input array. For example, q=0.5
computes the median.
Expand All @@ -1002,6 +1002,9 @@ def quantile(x, q, interpolation_method='fraction'):
Values from which to extract score.
q : scalar or array
Percentile at which to extract score.
bounded : bool, optional
Whether to use the min/max of the distribution as the lower/upper
bounds or use -inf/inf.
interpolation_method : {'fraction', 'lower', 'higher'}, optional
This optional parameter specifies the interpolation method to use,
when the desired quantile lies between two data points `i` and `j`:
Expand Down Expand Up @@ -1038,6 +1041,12 @@ def _interpolate(a, b, fraction):
return a + (b - a) * fraction

def _get_score(at):
if not bounded:
if at == 0:
return -np.inf
elif at == 1:
return np.inf

if len(values) == 0:
return np.nan

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
series_index, name, dtype)


def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
def qcut(x, q, labels=None, retbins=False, precision=3, bounded=True,
duplicates='raise'):
"""
Quantile-based discretization function. Discretize variable into
equal-sized buckets based on rank or based on sample quantiles. For example
Expand All @@ -260,6 +261,9 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
is given as a scalar.
precision : int, optional
The precision at which to store and display the bins labels
bounded : bool, optional
Whether to use the min/max of the distribution as the lower/upper
bounds or use -inf/inf.
duplicates : {default 'raise', 'drop'}, optional
If bin edges are not unique, raise ValueError or drop non-uniques.
Expand Down Expand Up @@ -301,7 +305,7 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
quantiles = np.linspace(0, 1, q + 1)
else:
quantiles = q
bins = algos.quantile(x, quantiles)
bins = algos.quantile(x, quantiles, bounded=bounded)
fac, bins = _bins_to_cuts(x, bins, labels=labels,
precision=precision, include_lowest=True,
dtype=dtype, duplicates=duplicates)
Expand Down

0 comments on commit b868335

Please sign in to comment.