Skip to content

Commit

Permalink
REF: implement _wrap_reduction_result (#37660)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Nov 8, 2020
1 parent a347bc1 commit 6ee20b0
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 36 deletions.
7 changes: 6 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Sequence, TypeVar
from typing import Any, Optional, Sequence, TypeVar

import numpy as np

Expand Down Expand Up @@ -255,6 +255,11 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
msg = f"'{type(self).__name__}' does not implement reduction '{name}'"
raise TypeError(msg)

def _wrap_reduction_result(self, axis: Optional[int], result):
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)

# ------------------------------------------------------------------------

def __repr__(self) -> str:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1957,7 +1957,7 @@ def min(self, *, skipna=True, **kwargs):
return np.nan
else:
pointer = self._codes.min()
return self.categories[pointer]
return self._wrap_reduction_result(None, pointer)

@deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna")
def max(self, *, skipna=True, **kwargs):
Expand Down Expand Up @@ -1993,7 +1993,7 @@ def max(self, *, skipna=True, **kwargs):
return np.nan
else:
pointer = self._codes.max()
return self.categories[pointer]
return self._wrap_reduction_result(None, pointer)

def mode(self, dropna=True):
"""
Expand Down
16 changes: 4 additions & 12 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1283,9 +1283,7 @@ def min(self, *, axis=None, skipna=True, **kwargs):
return self._from_backing_data(result)

result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
if lib.is_scalar(result):
return self._box_func(result)
return self._from_backing_data(result)
return self._wrap_reduction_result(axis, result)

def max(self, *, axis=None, skipna=True, **kwargs):
"""
Expand Down Expand Up @@ -1316,9 +1314,7 @@ def max(self, *, axis=None, skipna=True, **kwargs):
return self._from_backing_data(result)

result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
if lib.is_scalar(result):
return self._box_func(result)
return self._from_backing_data(result)
return self._wrap_reduction_result(axis, result)

def mean(self, *, skipna=True, axis: Optional[int] = 0):
"""
Expand Down Expand Up @@ -1357,9 +1353,7 @@ def mean(self, *, skipna=True, axis: Optional[int] = 0):
result = nanops.nanmean(
self._ndarray, axis=axis, skipna=skipna, mask=self.isna()
)
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)
return self._wrap_reduction_result(axis, result)

def median(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs):
nv.validate_median((), kwargs)
Expand All @@ -1378,9 +1372,7 @@ def median(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs):
return self._from_backing_data(result)

result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)
return self._wrap_reduction_result(axis, result)


class DatelikeOps(DatetimeLikeArrayMixin):
Expand Down
48 changes: 30 additions & 18 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from pandas.core.dtypes.missing import isna

from pandas.core import nanops, ops
from pandas.core.array_algos import masked_reductions
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.strings.object_array import ObjectStringArrayMixin
Expand Down Expand Up @@ -275,83 +274,96 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]:

def any(self, *, axis=None, out=None, keepdims=False, skipna=True):
nv.validate_any((), dict(out=out, keepdims=keepdims))
return nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def all(self, *, axis=None, out=None, keepdims=False, skipna=True):
nv.validate_all((), dict(out=out, keepdims=keepdims))
return nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def min(self, *, skipna: bool = True, **kwargs) -> Scalar:
def min(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar:
nv.validate_min((), kwargs)
return masked_reductions.min(
values=self.to_numpy(), mask=self.isna(), skipna=skipna
result = nanops.nanmin(
values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
)
return self._wrap_reduction_result(axis, result)

def max(self, *, skipna: bool = True, **kwargs) -> Scalar:
def max(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar:
nv.validate_max((), kwargs)
return masked_reductions.max(
values=self.to_numpy(), mask=self.isna(), skipna=skipna
result = nanops.nanmax(
values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
)
return self._wrap_reduction_result(axis, result)

def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar:
nv.validate_sum((), kwargs)
return nanops.nansum(
result = nanops.nansum(
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
)
return self._wrap_reduction_result(axis, result)

def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar:
nv.validate_prod((), kwargs)
return nanops.nanprod(
result = nanops.nanprod(
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
)
return self._wrap_reduction_result(axis, result)

def mean(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
nv.validate_mean((), dict(dtype=dtype, out=out, keepdims=keepdims))
return nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def median(
self, *, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True
):
nv.validate_median(
(), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims)
)
return nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def std(
self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True
):
nv.validate_stat_ddof_func(
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std"
)
return nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
return self._wrap_reduction_result(axis, result)

def var(
self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True
):
nv.validate_stat_ddof_func(
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="var"
)
return nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
return self._wrap_reduction_result(axis, result)

def sem(
self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True
):
nv.validate_stat_ddof_func(
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="sem"
)
return nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
return self._wrap_reduction_result(axis, result)

def kurt(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
nv.validate_stat_ddof_func(
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="kurt"
)
return nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def skew(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
nv.validate_stat_ddof_func(
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="skew"
)
return nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

# ------------------------------------------------------------------------
# Additional Methods
Expand Down
17 changes: 17 additions & 0 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np

from pandas._libs import lib, missing as libmissing
from pandas._typing import Scalar
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype
from pandas.core.dtypes.common import (
Expand All @@ -15,6 +17,7 @@
)

from pandas.core import ops
from pandas.core.array_algos import masked_reductions
from pandas.core.arrays import IntegerArray, PandasArray
from pandas.core.arrays.integer import _IntegerDtype
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -301,6 +304,20 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):

raise TypeError(f"Cannot perform reduction '{name}' with string dtype")

def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
nv.validate_min((), kwargs)
result = masked_reductions.min(
values=self.to_numpy(), mask=self.isna(), skipna=skipna
)
return self._wrap_reduction_result(axis, result)

def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
nv.validate_max((), kwargs)
result = masked_reductions.max(
values=self.to_numpy(), mask=self.isna(), skipna=skipna
)
return self._wrap_reduction_result(axis, result)

def value_counts(self, dropna=False):
from pandas import value_counts

Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,7 @@ def sum(
result = nanops.nansum(
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
)
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)
return self._wrap_reduction_result(axis, result)

def std(
self,
Expand Down
1 change: 1 addition & 0 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None):
assert not isna(fill_value), "Expected non-null fill_value"
if result == fill_value:
result = np.nan

if tz is not None:
# we get here e.g. via nanmean when we call it on a DTA[tz]
result = Timestamp(result, tz=tz)
Expand Down

0 comments on commit 6ee20b0

Please sign in to comment.