Skip to content

Commit

Permalink
Adds cummulative operators to API (pydata#812)
Browse files Browse the repository at this point in the history
* Adds nancumsum, nancumprod for numpy compatability

Needed until numpy v1.12, see
numpy/numpy#7421

* Adds nancumsum, nancumprod to xarray functions
  • Loading branch information
pwolfram authored and shoyer committed Oct 3, 2016
1 parent 573541e commit 9cf107b
Show file tree
Hide file tree
Showing 11 changed files with 269 additions and 30 deletions.
4 changes: 4 additions & 0 deletions doc/api-hidden.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
Dataset.round
Dataset.real
Dataset.T
Dataset.cumsum
Dataset.cumprod

DataArray.ndim
DataArray.shape
Expand Down Expand Up @@ -87,6 +89,8 @@
DataArray.round
DataArray.real
DataArray.T
DataArray.cumsum
DataArray.cumprod

ufuncs.angle
ufuncs.arccos
Expand Down
4 changes: 4 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ Computation
:py:attr:`~Dataset.round`
:py:attr:`~Dataset.real`
:py:attr:`~Dataset.T`
:py:attr:`~Dataset.cumsum`
:py:attr:`~Dataset.cumprod`

**Grouped operations**:
:py:attr:`~core.groupby.DatasetGroupBy.assign`
Expand Down Expand Up @@ -286,6 +288,8 @@ Computation
:py:attr:`~DataArray.round`
:py:attr:`~DataArray.real`
:py:attr:`~DataArray.T`
:py:attr:`~DataArray.cumsum`
:py:attr:`~DataArray.cumprod`

**Grouped operations**:
:py:attr:`~core.groupby.DataArrayGroupBy.assign_coords`
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ Enhancements
which to concatenate.
By `Stephan Hoyer <https://github.com/shoyer>`_.

- Adds DataArray and Dataset methods :py:meth:`~xarray.DataArray.cumsum` and
:py:meth:`~xarray.DataArray.cumprod`. By `Phillip J. Wolfram
<https://github.com/pwolfram>`_.

Bug fixes
~~~~~~~~~
- ``groupby_bins`` now restores empty bins by default (:issue:`1019`).
Expand Down
14 changes: 14 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
and 'axis' arguments can be supplied. If neither are supplied, then
`{name}` is calculated over axes."""

_cum_extra_args_docstring = \
"""dim : str or sequence of str, optional
Dimension over which to apply `{name}`.
axis : int or sequence of int, optional
Axis over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied."""


class ImplementsDatasetReduce(object):
@classmethod
Expand All @@ -51,6 +58,13 @@ def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
Dimension(s) over which to apply `func`. By default `func` is
applied over all dimensions."""

_cum_extra_args_docstring = \
"""dim : str or sequence of str, optional
Dimension over which to apply `{name}`.
axis : int or sequence of int, optional
Axis over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied."""


class ImplementsRollingArrayReduce(object):
@classmethod
Expand Down
131 changes: 129 additions & 2 deletions xarray/core/npcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import numpy as np

try:
from numpy import broadcast_to, stack, nanprod
from numpy import broadcast_to, stack, nanprod, nancumsum, nancumprod
except ImportError: # pragma: no cover
# these functions should arrive in numpy v1.10
# these functions should arrive in numpy v1.10 to v1.12

def _maybe_view_as_subclass(original_array, new_array):
if type(original_array) is not type(new_array):
Expand Down Expand Up @@ -251,3 +251,130 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=0):
"""
a, mask = _replace_nan(a, 1)
return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)


def nancumsum(a, axis=None, dtype=None, out=None):
"""
Return the cumulative sum of array elements over a given axis treating Not a
Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
encountered and leading NaNs are replaced by zeros.
Zeros are returned for slices that are all-NaN or empty.
.. versionadded:: 1.12.0
Parameters
----------
a : array_like
Input array.
axis : int, optional
Axis along which the cumulative sum is computed. The default
(None) is to compute the cumsum over the flattened array.
dtype : dtype, optional
Type of the returned array and of the accumulator in which the
elements are summed. If `dtype` is not specified, it defaults
to the dtype of `a`, unless `a` has an integer dtype with a
precision less than that of the default platform integer. In
that case, the default platform integer is used.
out : ndarray, optional
Alternative output array in which to place the result. It must
have the same shape and buffer length as the expected output
but the type will be cast if necessary. See `doc.ufuncs`
(Section "Output arguments") for more details.
Returns
-------
nancumsum : ndarray.
A new array holding the result is returned unless `out` is
specified, in which it is returned. The result has the same
size as `a`, and the same shape as `a` if `axis` is not None
or `a` is a 1-d array.
See Also
--------
numpy.cumsum : Cumulative sum across array propagating NaNs.
isnan : Show which elements are NaN.
Examples
--------
>>> np.nancumsum(1)
array([1])
>>> np.nancumsum([1])
array([1])
>>> np.nancumsum([1, np.nan])
array([ 1., 1.])
>>> a = np.array([[1, 2], [3, np.nan]])
>>> np.nancumsum(a)
array([ 1., 3., 6., 6.])
>>> np.nancumsum(a, axis=0)
array([[ 1., 2.],
[ 4., 2.]])
>>> np.nancumsum(a, axis=1)
array([[ 1., 3.],
[ 3., 3.]])
"""
a, mask = _replace_nan(a, 0)
return np.cumsum(a, axis=axis, dtype=dtype, out=out)


def nancumprod(a, axis=None, dtype=None, out=None):
"""
Return the cumulative product of array elements over a given axis treating Not a
Numbers (NaNs) as one. The cumulative product does not change when NaNs are
encountered and leading NaNs are replaced by ones.
Ones are returned for slices that are all-NaN or empty.
.. versionadded:: 1.12.0
Parameters
----------
a : array_like
Input array.
axis : int, optional
Axis along which the cumulative product is computed. By default
the input is flattened.
dtype : dtype, optional
Type of the returned array, as well as of the accumulator in which
the elements are multiplied. If *dtype* is not specified, it
defaults to the dtype of `a`, unless `a` has an integer dtype with
a precision less than that of the default platform integer. In
that case, the default platform integer is used instead.
out : ndarray, optional
Alternative output array in which to place the result. It must
have the same shape and buffer length as the expected output
but the type of the resulting values will be cast if necessary.
Returns
-------
nancumprod : ndarray
A new array holding the result is returned unless `out` is
specified, in which case it is returned.
See Also
--------
numpy.cumprod : Cumulative product across array propagating NaNs.
isnan : Show which elements are NaN.
Examples
--------
>>> np.nancumprod(1)
array([1])
>>> np.nancumprod([1])
array([1])
>>> np.nancumprod([1, np.nan])
array([ 1., 1.])
>>> a = np.array([[1, 2], [3, np.nan]])
>>> np.nancumprod(a)
array([ 1., 2., 6., 6.])
>>> np.nancumprod(a, axis=0)
array([[ 1., 2.],
[ 3., 2.]])
>>> np.nancumprod(a, axis=1)
array([[ 1., 2.],
[ 3., 3.]])
"""
a, mask = _replace_nan(a, 1)
return np.cumprod(a, axis=axis, dtype=dtype, out=out)
78 changes: 56 additions & 22 deletions xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,11 @@
REDUCE_METHODS = ['all', 'any']
NAN_REDUCE_METHODS = ['argmax', 'argmin', 'max', 'min', 'mean', 'prod', 'sum',
'std', 'var', 'median']
NAN_CUM_METHODS = ['cumsum', 'cumprod']
BOTTLENECK_ROLLING_METHODS = {'move_sum': 'sum', 'move_mean': 'mean',
'move_std': 'std', 'move_min': 'min',
'move_max': 'max'}
# TODO: wrap cumprod/cumsum, take, dot, sort
# TODO: wrap take, dot, sort


def _dask_or_eager_func(name, eager_module=np, list_of_args=False,
Expand Down Expand Up @@ -201,6 +202,30 @@ def func(self, *args, **kwargs):
func.__doc__ = f.__doc__
return func

_CUM_DOCSTRING_TEMPLATE = \
"""Apply `{name}` along some dimension of {cls}.
Parameters
----------
{extra_args}
skipna : bool, optional
If True, skip missing values (as marked by NaN). By default, only
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
keep_attrs : bool, optional
If True, the attributes (`attrs`) will be copied from the original
object to the new one. If False (default), the new object will be
returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `{name}`.
Returns
-------
cumvalue : {cls}
New {cls} object with `{name}` applied to its data along the
indicated dimension.
"""

_REDUCE_DOCSTRING_TEMPLATE = \
"""Reduce this {cls}'s data by applying `{name}` along some
Expand Down Expand Up @@ -274,7 +299,9 @@ def _ignore_warnings_if(condition):
yield


def _create_nan_agg_method(name, numeric_only=False, coerce_strings=False):
def _create_nan_agg_method(name, numeric_only=False, np_compat=False,
no_bottleneck=False, coerce_strings=False,
keep_dims=False):
def f(values, axis=None, skipna=None, **kwargs):
# ignore keyword args inserted by np.mean and other numpy aggregators
# automatically:
Expand All @@ -292,14 +319,17 @@ def f(values, axis=None, skipna=None, **kwargs):
'skipna=True not yet implemented for %s with dtype %s'
% (name, values.dtype))
nanname = 'nan' + name
if isinstance(axis, tuple) or not values.dtype.isnative:
if isinstance(axis, tuple) or not values.dtype.isnative or no_bottleneck:
# bottleneck can't handle multiple axis arguments or non-native
# endianness
eager_module = np
if np_compat:
eager_module = npcompat
else:
eager_module = np
else:
eager_module = bn
func = _dask_or_eager_func(nanname, eager_module)
using_numpy_nan_func = eager_module is np
using_numpy_nan_func = eager_module is np or eager_module is npcompat
else:
func = _dask_or_eager_func(name)
using_numpy_nan_func = False
Expand All @@ -312,10 +342,12 @@ def f(values, axis=None, skipna=None, **kwargs):
else:
assert using_numpy_nan_func
msg = ('%s is not available with skipna=False with the '
'installed version of numpy; upgrade to numpy 1.9 '
'installed version of numpy; upgrade to numpy 1.12 '
'or newer to use skipna=True or skipna=None' % name)
raise NotImplementedError(msg)
f.numeric_only = numeric_only
f.keep_dims = keep_dims
f.__name__ = name
return f


Expand All @@ -328,28 +360,18 @@ def f(values, axis=None, skipna=None, **kwargs):
std = _create_nan_agg_method('std', numeric_only=True)
var = _create_nan_agg_method('var', numeric_only=True)
median = _create_nan_agg_method('median', numeric_only=True)

prod = _create_nan_agg_method('prod', numeric_only=True, np_compat=True,
no_bottleneck=True)
cumprod = _create_nan_agg_method('cumprod', numeric_only=True, np_compat=True,
no_bottleneck=True, keep_dims=True)
cumsum = _create_nan_agg_method('cumsum', numeric_only=True, np_compat=True,
no_bottleneck=True, keep_dims=True)

_fail_on_dask_array_input_skipna = partial(
_fail_on_dask_array_input,
msg='%r with skipna=True is not yet implemented on dask arrays')


_prod = _dask_or_eager_func('prod')


def prod(values, axis=None, skipna=None, **kwargs):
if skipna or (skipna is None and values.dtype.kind == 'f'):
if values.dtype.kind not in ['i', 'f']:
raise NotImplementedError(
'skipna=True not yet implemented for prod with dtype %s'
% values.dtype)
_fail_on_dask_array_input_skipna(values)
return npcompat.nanprod(values, axis=axis, **kwargs)
return _prod(values, axis=axis, **kwargs)
prod.numeric_only = True


def first(values, axis, skipna=None):
"""Return the first non-NA elements in this array along the given axis
"""
Expand Down Expand Up @@ -384,6 +406,17 @@ def inject_reduce_methods(cls):
extra_args=cls._reduce_extra_args_docstring)
setattr(cls, name, func)

def inject_cum_methods(cls):
methods = ([(name, globals()[name], True) for name in NAN_CUM_METHODS])
for name, f, include_skipna in methods:
numeric_only = getattr(f, 'numeric_only', False)
func = cls._reduce_method(f, include_skipna, numeric_only)
func.__name__ = name
func.__doc__ = _CUM_DOCSTRING_TEMPLATE.format(
name=name, cls=cls.__name__,
extra_args=cls._cum_extra_args_docstring)
setattr(cls, name, func)


def op_str(name):
return '__%s__' % name
Expand Down Expand Up @@ -454,6 +487,7 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
setattr(cls, name, _values_method_wrapper(name))

inject_reduce_methods(cls)
inject_cum_methods(cls)


def inject_bottleneck_rolling_methods(cls):
Expand Down
17 changes: 13 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,15 +896,24 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
if dim is not None and axis is not None:
raise ValueError("cannot supply both 'axis' and 'dim' arguments")

if getattr(func, 'keep_dims', False):
if dim is None and axis is None:
raise ValueError("must supply either single 'dim' or 'axis' argument to %s"
% (func.__name__))

if dim is not None:
axis = self.get_axis_num(dim)
data = func(self.data if allow_lazy else self.values,
axis=axis, **kwargs)

removed_axes = (range(self.ndim) if axis is None
else np.atleast_1d(axis) % self.ndim)
dims = [dim for n, dim in enumerate(self.dims)
if n not in removed_axes]
if getattr(data, 'shape', ()) == self.shape:
dims = self.dims
else:
removed_axes = (range(self.ndim) if axis is None
else np.atleast_1d(axis) % self.ndim)
dims = [adim for n, adim in enumerate(self.dims)
if n not in removed_axes]


attrs = self._attrs if keep_attrs else None

Expand Down
Loading

0 comments on commit 9cf107b

Please sign in to comment.