Adds cummulative operators to API (pydata#812)

* Adds nancumsum, nancumprod for numpy compatability Needed until numpy v1.12, see numpy/numpy#7421 * Adds nancumsum, nancumprod to xarray functions
fmaussion · Oct 3, 2016 · 9cf107b · 9cf107b
1 parent 573541e
commit 9cf107b
Show file tree

Hide file tree

Showing 11 changed files with 269 additions and 30 deletions.
diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
@@ -44,6 +44,8 @@
    Dataset.round
    Dataset.real
    Dataset.T
+   Dataset.cumsum
+   Dataset.cumprod
 
    DataArray.ndim
    DataArray.shape
@@ -87,6 +89,8 @@
    DataArray.round
    DataArray.real
    DataArray.T
+   DataArray.cumsum
+   DataArray.cumprod
 
    ufuncs.angle
    ufuncs.arccos

diff --git a/doc/api.rst b/doc/api.rst
@@ -145,6 +145,8 @@ Computation
 :py:attr:`~Dataset.round`
 :py:attr:`~Dataset.real`
 :py:attr:`~Dataset.T`
+:py:attr:`~Dataset.cumsum`
+:py:attr:`~Dataset.cumprod`
 
 **Grouped operations**:
 :py:attr:`~core.groupby.DatasetGroupBy.assign`
@@ -286,6 +288,8 @@ Computation
 :py:attr:`~DataArray.round`
 :py:attr:`~DataArray.real`
 :py:attr:`~DataArray.T`
+:py:attr:`~DataArray.cumsum`
+:py:attr:`~DataArray.cumprod`
 
 **Grouped operations**:
 :py:attr:`~core.groupby.DataArrayGroupBy.assign_coords`

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -73,6 +73,10 @@ Enhancements
   which to concatenate.
   By `Stephan Hoyer <https://github.com/shoyer>`_.
 
+- Adds DataArray and Dataset methods :py:meth:`~xarray.DataArray.cumsum` and
+  :py:meth:`~xarray.DataArray.cumprod`.  By `Phillip J. Wolfram
+  <https://github.com/pwolfram>`_.
+
 Bug fixes
 ~~~~~~~~~
 - ``groupby_bins`` now restores empty bins by default (:issue:`1019`).

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -29,6 +29,13 @@ def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
             and 'axis' arguments can be supplied. If neither are supplied, then
             `{name}` is calculated over axes."""
 
+    _cum_extra_args_docstring = \
+        """dim : str or sequence of str, optional
+            Dimension over which to apply `{name}`.
+        axis : int or sequence of int, optional
+            Axis over which to apply `{name}`. Only one of the 'dim'
+            and 'axis' arguments can be supplied."""
+
 
 class ImplementsDatasetReduce(object):
     @classmethod
@@ -51,6 +58,13 @@ def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
             Dimension(s) over which to apply `func`.  By default `func` is
             applied over all dimensions."""
 
+    _cum_extra_args_docstring = \
+        """dim : str or sequence of str, optional
+            Dimension over which to apply `{name}`.
+        axis : int or sequence of int, optional
+            Axis over which to apply `{name}`. Only one of the 'dim'
+            and 'axis' arguments can be supplied."""
+
 
 class ImplementsRollingArrayReduce(object):
     @classmethod

diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py
@@ -5,9 +5,9 @@
 import numpy as np
 
 try:
-    from numpy import broadcast_to, stack, nanprod
+    from numpy import broadcast_to, stack, nanprod, nancumsum, nancumprod
 except ImportError:  # pragma: no cover
-    # these functions should arrive in numpy v1.10
+    # these functions should arrive in numpy v1.10 to v1.12
 
     def _maybe_view_as_subclass(original_array, new_array):
         if type(original_array) is not type(new_array):
@@ -251,3 +251,130 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=0):
         """
         a, mask = _replace_nan(a, 1)
         return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+
+
+    def nancumsum(a, axis=None, dtype=None, out=None):
+        """
+        Return the cumulative sum of array elements over a given axis treating Not a
+        Numbers (NaNs) as zero.  The cumulative sum does not change when NaNs are
+        encountered and leading NaNs are replaced by zeros.
+
+        Zeros are returned for slices that are all-NaN or empty.
+
+        .. versionadded:: 1.12.0
+
+        Parameters
+        ----------
+        a : array_like
+            Input array.
+        axis : int, optional
+            Axis along which the cumulative sum is computed. The default
+            (None) is to compute the cumsum over the flattened array.
+        dtype : dtype, optional
+            Type of the returned array and of the accumulator in which the
+            elements are summed.  If `dtype` is not specified, it defaults
+            to the dtype of `a`, unless `a` has an integer dtype with a
+            precision less than that of the default platform integer.  In
+            that case, the default platform integer is used.
+        out : ndarray, optional
+            Alternative output array in which to place the result. It must
+            have the same shape and buffer length as the expected output
+            but the type will be cast if necessary. See `doc.ufuncs`
+            (Section "Output arguments") for more details.
+
+        Returns
+        -------
+        nancumsum : ndarray.
+            A new array holding the result is returned unless `out` is
+            specified, in which it is returned. The result has the same
+            size as `a`, and the same shape as `a` if `axis` is not None
+            or `a` is a 1-d array.
+
+        See Also
+        --------
+        numpy.cumsum : Cumulative sum across array propagating NaNs.
+        isnan : Show which elements are NaN.
+
+        Examples
+        --------
+        >>> np.nancumsum(1)
+        array([1])
+        >>> np.nancumsum([1])
+        array([1])
+        >>> np.nancumsum([1, np.nan])
+        array([ 1.,  1.])
+        >>> a = np.array([[1, 2], [3, np.nan]])
+        >>> np.nancumsum(a)
+        array([ 1.,  3.,  6.,  6.])
+        >>> np.nancumsum(a, axis=0)
+        array([[ 1.,  2.],
+               [ 4.,  2.]])
+        >>> np.nancumsum(a, axis=1)
+        array([[ 1.,  3.],
+               [ 3.,  3.]])
+
+        """
+        a, mask = _replace_nan(a, 0)
+        return np.cumsum(a, axis=axis, dtype=dtype, out=out)
+
+
+    def nancumprod(a, axis=None, dtype=None, out=None):
+        """
+        Return the cumulative product of array elements over a given axis treating Not a
+        Numbers (NaNs) as one.  The cumulative product does not change when NaNs are
+        encountered and leading NaNs are replaced by ones.
+
+        Ones are returned for slices that are all-NaN or empty.
+
+        .. versionadded:: 1.12.0
+
+        Parameters
+        ----------
+        a : array_like
+            Input array.
+        axis : int, optional
+            Axis along which the cumulative product is computed.  By default
+            the input is flattened.
+        dtype : dtype, optional
+            Type of the returned array, as well as of the accumulator in which
+            the elements are multiplied.  If *dtype* is not specified, it
+            defaults to the dtype of `a`, unless `a` has an integer dtype with
+            a precision less than that of the default platform integer.  In
+            that case, the default platform integer is used instead.
+        out : ndarray, optional
+            Alternative output array in which to place the result. It must
+            have the same shape and buffer length as the expected output
+            but the type of the resulting values will be cast if necessary.
+
+        Returns
+        -------
+        nancumprod : ndarray
+            A new array holding the result is returned unless `out` is
+            specified, in which case it is returned.
+
+        See Also
+        --------
+        numpy.cumprod : Cumulative product across array propagating NaNs.
+        isnan : Show which elements are NaN.
+
+        Examples
+        --------
+        >>> np.nancumprod(1)
+        array([1])
+        >>> np.nancumprod([1])
+        array([1])
+        >>> np.nancumprod([1, np.nan])
+        array([ 1.,  1.])
+        >>> a = np.array([[1, 2], [3, np.nan]])
+        >>> np.nancumprod(a)
+        array([ 1.,  2.,  6.,  6.])
+        >>> np.nancumprod(a, axis=0)
+        array([[ 1.,  2.],
+               [ 3.,  2.]])
+        >>> np.nancumprod(a, axis=1)
+        array([[ 1.,  2.],
+               [ 3.,  3.]])
+
+        """
+        a, mask = _replace_nan(a, 1)
+        return np.cumprod(a, axis=axis, dtype=dtype, out=out)
diff --git a/xarray/core/ops.py b/xarray/core/ops.py
@@ -45,10 +45,11 @@
 REDUCE_METHODS = ['all', 'any']
 NAN_REDUCE_METHODS = ['argmax', 'argmin', 'max', 'min', 'mean', 'prod', 'sum',
                       'std', 'var', 'median']
+NAN_CUM_METHODS = ['cumsum', 'cumprod']
 BOTTLENECK_ROLLING_METHODS = {'move_sum': 'sum', 'move_mean': 'mean',
                               'move_std': 'std', 'move_min': 'min',
                               'move_max': 'max'}
-# TODO: wrap cumprod/cumsum, take, dot, sort
+# TODO: wrap take, dot, sort
 
 
 def _dask_or_eager_func(name, eager_module=np, list_of_args=False,
@@ -201,6 +202,30 @@ def func(self, *args, **kwargs):
     func.__doc__ = f.__doc__
     return func
 
+_CUM_DOCSTRING_TEMPLATE = \
+        """Apply `{name}` along some dimension of {cls}.
+
+        Parameters
+        ----------
+        {extra_args}
+        skipna : bool, optional
+            If True, skip missing values (as marked by NaN). By default, only
+            skips missing values for float dtypes; other dtypes either do not
+            have a sentinel missing value (int) or skipna=True has not been
+            implemented (object, datetime64 or timedelta64).
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        **kwargs : dict
+            Additional keyword arguments passed on to `{name}`.
+
+        Returns
+        -------
+        cumvalue : {cls}
+            New {cls} object with `{name}` applied to its data along the
+            indicated dimension.
+        """
 
 _REDUCE_DOCSTRING_TEMPLATE = \
         """Reduce this {cls}'s data by applying `{name}` along some
@@ -274,7 +299,9 @@ def _ignore_warnings_if(condition):
         yield
 
 
-def _create_nan_agg_method(name, numeric_only=False, coerce_strings=False):
+def _create_nan_agg_method(name, numeric_only=False, np_compat=False,
+                           no_bottleneck=False, coerce_strings=False,
+                           keep_dims=False):
     def f(values, axis=None, skipna=None, **kwargs):
         # ignore keyword args inserted by np.mean and other numpy aggregators
         # automatically:
@@ -292,14 +319,17 @@ def f(values, axis=None, skipna=None, **kwargs):
                     'skipna=True not yet implemented for %s with dtype %s'
                     % (name, values.dtype))
             nanname = 'nan' + name
-            if isinstance(axis, tuple) or not values.dtype.isnative:
+            if isinstance(axis, tuple) or not values.dtype.isnative or no_bottleneck:
                 # bottleneck can't handle multiple axis arguments or non-native
                 # endianness
-                eager_module = np
+                if np_compat:
+                    eager_module = npcompat
+                else:
+                    eager_module = np
             else:
                 eager_module = bn
             func = _dask_or_eager_func(nanname, eager_module)
-            using_numpy_nan_func = eager_module is np
+            using_numpy_nan_func = eager_module is np or eager_module is npcompat
         else:
             func = _dask_or_eager_func(name)
             using_numpy_nan_func = False
@@ -312,10 +342,12 @@ def f(values, axis=None, skipna=None, **kwargs):
                 else:
                     assert using_numpy_nan_func
                     msg = ('%s is not available with skipna=False with the '
-                           'installed version of numpy; upgrade to numpy 1.9 '
+                           'installed version of numpy; upgrade to numpy 1.12 '
                            'or newer to use skipna=True or skipna=None' % name)
                 raise NotImplementedError(msg)
     f.numeric_only = numeric_only
+    f.keep_dims = keep_dims
+    f.__name__ = name
     return f
 
 
@@ -328,28 +360,18 @@ def f(values, axis=None, skipna=None, **kwargs):
 std = _create_nan_agg_method('std', numeric_only=True)
 var = _create_nan_agg_method('var', numeric_only=True)
 median = _create_nan_agg_method('median', numeric_only=True)
-
+prod = _create_nan_agg_method('prod', numeric_only=True, np_compat=True,
+                              no_bottleneck=True)
+cumprod = _create_nan_agg_method('cumprod', numeric_only=True, np_compat=True,
+                                 no_bottleneck=True, keep_dims=True)
+cumsum = _create_nan_agg_method('cumsum', numeric_only=True, np_compat=True,
+                                no_bottleneck=True, keep_dims=True)
 
 _fail_on_dask_array_input_skipna = partial(
     _fail_on_dask_array_input,
     msg='%r with skipna=True is not yet implemented on dask arrays')
 
 
-_prod = _dask_or_eager_func('prod')
-
-
-def prod(values, axis=None, skipna=None, **kwargs):
-    if skipna or (skipna is None and values.dtype.kind == 'f'):
-        if values.dtype.kind not in ['i', 'f']:
-            raise NotImplementedError(
-                'skipna=True not yet implemented for prod with dtype %s'
-                % values.dtype)
-        _fail_on_dask_array_input_skipna(values)
-        return npcompat.nanprod(values, axis=axis, **kwargs)
-    return _prod(values, axis=axis, **kwargs)
-prod.numeric_only = True
-
-
 def first(values, axis, skipna=None):
     """Return the first non-NA elements in this array along the given axis
     """
@@ -384,6 +406,17 @@ def inject_reduce_methods(cls):
             extra_args=cls._reduce_extra_args_docstring)
         setattr(cls, name, func)
 
+def inject_cum_methods(cls):
+    methods = ([(name, globals()[name], True) for name in NAN_CUM_METHODS])
+    for name, f, include_skipna in methods:
+        numeric_only = getattr(f, 'numeric_only', False)
+        func = cls._reduce_method(f, include_skipna, numeric_only)
+        func.__name__ = name
+        func.__doc__ = _CUM_DOCSTRING_TEMPLATE.format(
+            name=name, cls=cls.__name__,
+            extra_args=cls._cum_extra_args_docstring)
+        setattr(cls, name, func)
+
 
 def op_str(name):
     return '__%s__' % name
@@ -454,6 +487,7 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
             setattr(cls, name, _values_method_wrapper(name))
 
     inject_reduce_methods(cls)
+    inject_cum_methods(cls)
 
 
 def inject_bottleneck_rolling_methods(cls):

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -896,15 +896,24 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
         if dim is not None and axis is not None:
             raise ValueError("cannot supply both 'axis' and 'dim' arguments")
 
+        if getattr(func, 'keep_dims', False):
+            if dim is None and axis is None:
+                raise ValueError("must supply either single 'dim' or 'axis' argument to %s"
+                                 % (func.__name__))
+
         if dim is not None:
             axis = self.get_axis_num(dim)
         data = func(self.data if allow_lazy else self.values,
                     axis=axis, **kwargs)
 
-        removed_axes = (range(self.ndim) if axis is None
-                        else np.atleast_1d(axis) % self.ndim)
-        dims = [dim for n, dim in enumerate(self.dims)
-                if n not in removed_axes]
+        if getattr(data, 'shape', ()) == self.shape:
+            dims = self.dims
+        else:
+            removed_axes = (range(self.ndim) if axis is None
+                            else np.atleast_1d(axis) % self.ndim)
+            dims = [adim for n, adim in enumerate(self.dims)
+                    if n not in removed_axes]
+
 
         attrs = self._attrs if keep_attrs else None