From 1c351f77d3464088a6a1a59999feefd93d020d57 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Mon, 24 Sep 2018 22:57:45 +0100 Subject: [PATCH 01/10] Add Mask kwarg --- pandas/core/nanops.py | 60 +++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 7619d47cbc8f9..6f42f04909ff9 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,12 +1,16 @@ -import itertools import functools +import itertools import operator import warnings from distutils.version import LooseVersion import numpy as np + +import pandas.core.common as com from pandas import compat from pandas._libs import tslibs, lib +from pandas.core.config import get_option +from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.common import ( _get_dtype, is_float, is_scalar, @@ -17,10 +21,7 @@ is_datetime64_dtype, is_timedelta64_dtype, is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) -from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype -from pandas.core.config import get_option -import pandas.core.common as com _BOTTLENECK_INSTALLED = False _MIN_BOTTLENECK_VERSION = '1.0.0' @@ -200,16 +201,18 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): def _get_values(values, skipna, fill_value=None, fill_value_typ=None, - isfinite=False, copy=True): + isfinite=False, copy=True, mask=None): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = com.values_from_object(values) - if isfinite: - mask = _isfinite(values) - else: - mask = isna(values) + + if mask is None: + if isfinite: + mask = _isfinite(values) + else: + mask = isna(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) @@ -315,19 +318,21 @@ def _na_for_min_count(values, axis): return result -def nanany(values, axis=None, skipna=True): - values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna) +def nanany(values, axis=None, skipna=True, mask=None): + values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna, + mask=mask) return values.any(axis) -def nanall(values, axis=None, skipna=True): - values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna) +def nanall(values, axis=None, skipna=True, mask=None): + values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna, + mask=mask) return values.all(axis) @disallow('M8') -def nansum(values, axis=None, skipna=True, min_count=0): - values, mask, dtype, dtype_max = _get_values(values, skipna, 0) +def nansum(values, axis=None, skipna=True, min_count=0, mask=None): + values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) dtype_sum = dtype_max if is_float_dtype(dtype): dtype_sum = dtype @@ -341,9 +346,8 @@ def nansum(values, axis=None, skipna=True, min_count=0): @disallow('M8') @bottleneck_switch() -def nanmean(values, axis=None, skipna=True): - values, mask, dtype, dtype_max = _get_values(values, skipna, 0) - +def nanmean(values, axis=None, skipna=True, mask=None): + values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) dtype_sum = dtype_max dtype_count = np.float64 if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype): @@ -367,15 +371,14 @@ def nanmean(values, axis=None, skipna=True): @disallow('M8') @bottleneck_switch() -def nanmedian(values, axis=None, skipna=True): - +def nanmedian(values, axis=None, skipna=True, mask=None): def get_median(x): mask = notna(x) if not skipna and not mask.all(): return np.nan return np.nanmedian(x[mask]) - values, mask, dtype, dtype_max = _get_values(values, skipna) + values, mask, dtype, dtype_max = _get_values(values, skipna, mask=mask) if not is_float_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -439,7 +442,6 @@ def nanstd(values, axis=None, skipna=True, ddof=1): @disallow('M8') @bottleneck_switch(ddof=1) def nanvar(values, axis=None, skipna=True, ddof=1): - values = com.values_from_object(values) dtype = values.dtype mask = isna(values) @@ -465,7 +467,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) - sqr = _ensure_numeric((avg - values)**2) + sqr = _ensure_numeric((avg - values) ** 2) np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d @@ -520,22 +522,24 @@ def reduction(values, axis=None, skipna=True): @disallow('O') -def nanargmax(values, axis=None, skipna=True): +def nanargmax(values, axis=None, skipna=True, mask=None): """ Returns -1 in the NA case """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf') + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf', + mask=mask) result = values.argmax(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result @disallow('O') -def nanargmin(values, axis=None, skipna=True): +def nanargmin(values, axis=None, skipna=True, mask=None): """ Returns -1 in the NA case """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf') + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf', + mask=mask) result = values.argmin(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result @@ -636,7 +640,7 @@ def nankurt(values, axis=None, skipna=True): with np.errstate(invalid='ignore', divide='ignore'): adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) numer = count * (count + 1) * (count - 1) * m4 - denom = (count - 2) * (count - 3) * m2**2 + denom = (count - 2) * (count - 3) * m2 ** 2 # floating point error # From 9b28848d577475d127b57ae14a061e56057b2902 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 27 Sep 2018 21:58:57 +0100 Subject: [PATCH 02/10] Add doc strings and further mask kwargs --- pandas/core/nanops.py | 272 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 255 insertions(+), 17 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6f42f04909ff9..7b9e222571540 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -319,12 +319,64 @@ def _na_for_min_count(values, axis): def nanany(values, axis=None, skipna=True, mask=None): + """ + Check if any elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2]) + >>> nanops.nanany(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([np.nan]) + >>> nanops.nanany(s) + False + """ values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna, mask=mask) return values.any(axis) def nanall(values, axis=None, skipna=True, mask=None): + """ + Check if all elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanall(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 0]) + >>> nanops.nanall(s) + False + """ values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna, mask=mask) return values.all(axis) @@ -332,6 +384,24 @@ def nanall(values, axis=None, skipna=True, mask=None): @disallow('M8') def nansum(values, axis=None, skipna=True, min_count=0, mask=None): + """ + Sum the elements along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + min_count: int + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nansum(s) + 3.0 + """ values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) dtype_sum = dtype_max if is_float_dtype(dtype): @@ -347,6 +417,23 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None): @disallow('M8') @bottleneck_switch() def nanmean(values, axis=None, skipna=True, mask=None): + """ + Compute the mean of the element along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanmean(s) + 1.5 + """ values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) dtype_sum = dtype_max dtype_count = np.float64 @@ -372,6 +459,21 @@ def nanmean(values, axis=None, skipna=True, mask=None): @disallow('M8') @bottleneck_switch() def nanmedian(values, axis=None, skipna=True, mask=None): + """ + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 2]) + >>> nanops.nanmedian(s) + 2.0 + """ def get_median(x): mask = notna(x) if not skipna and not mask.all(): @@ -434,17 +536,53 @@ def _get_counts_nanvar(mask, axis, ddof, dtype=float): @disallow('M8') @bottleneck_switch(ddof=1) -def nanstd(values, axis=None, skipna=True, ddof=1): - result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof)) +def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard deviation along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanstd(s) + 1.0 + """ + result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, + mask=mask)) return _wrap_results(result, values.dtype) @disallow('M8') @bottleneck_switch(ddof=1) -def nanvar(values, axis=None, skipna=True, ddof=1): +def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the variance along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanvar(s) + 1.0 + """ values = com.values_from_object(values) dtype = values.dtype - mask = isna(values) + if mask is None: + mask = isna(values) if is_any_int_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -480,12 +618,31 @@ def nanvar(values, axis=None, skipna=True, ddof=1): @disallow('M8', 'm8') -def nansem(values, axis=None, skipna=True, ddof=1): +def nansem(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard error in the mean along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nansem(s) + 0.5773502691896258 + """ + # This checks if non-numeric-like data is passed with numeric_only=False # and raises a TypeError otherwise - nanvar(values, axis, skipna, ddof=ddof) + nanvar(values, axis, skipna, ddof=ddof, mask=mask) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype) @@ -524,7 +681,23 @@ def reduction(values, axis=None, skipna=True): @disallow('O') def nanargmax(values, axis=None, skipna=True, mask=None): """ - Returns -1 in the NA case + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmax(s) + 4 + + Returns + -------- + The index of max value in specified axis or -1 in the NA case """ values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf', mask=mask) @@ -536,7 +709,23 @@ def nanargmax(values, axis=None, skipna=True, mask=None): @disallow('O') def nanargmin(values, axis=None, skipna=True, mask=None): """ - Returns -1 in the NA case + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmin(s) + 0 + + Returns + -------- + The index of min value in specified axis or -1 in the NA case """ values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf', mask=mask) @@ -546,17 +735,30 @@ def nanargmin(values, axis=None, skipna=True, mask=None): @disallow('M8', 'm8') -def nanskew(values, axis=None, skipna=True): +def nanskew(values, axis=None, skipna=True, mask=None): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1,np.nan, 1, 2]) + >>> nanops.nanskew(s) + 1.7320508075688787 """ - values = com.values_from_object(values) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -605,16 +807,31 @@ def nanskew(values, axis=None, skipna=True): @disallow('M8', 'm8') -def nankurt(values, axis=None, skipna=True): - """ Compute the sample excess kurtosis. +def nankurt(values, axis=None, skipna=True, mask=None): + """ + Compute the sample excess kurtosis The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G2, computed directly from the second and fourth central moment. + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1,np.nan, 1, 3, 2]) + >>> nanops.nankurt(s) + -1.2892561983471076 """ values = com.values_from_object(values) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -672,8 +889,29 @@ def nankurt(values, axis=None, skipna=True): @disallow('M8', 'm8') -def nanprod(values, axis=None, skipna=True, min_count=0): - mask = isna(values) +def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): + """ + Parameters + ---------- + values : ndarray + axis : int + skipna : bool + min_count: int + mask : ndarray nan-mask if known + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan]) + >>> nanops.nanprod(s) + 6.0 + + Returns + -------- + The product of all elements on a given axis. ( NaNs are treated as 1) + """ + if mask is None: + mask = isna(values) if skipna and not is_any_int_dtype(values): values = values.copy() values[mask] = 1 From 2b4e23e84e1bfe31bdcd578ba1c2fb9a7c488d9c Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 27 Sep 2018 22:15:11 +0100 Subject: [PATCH 03/10] Add nanmin and nanmax --- pandas/core/nanops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 7b9e222571540..b73f5835d55d9 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -653,9 +653,9 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None): def _nanminmax(meth, fill_value_typ): @bottleneck_switch() - def reduction(values, axis=None, skipna=True): + def reduction(values, axis=None, skipna=True, mask=None): values, mask, dtype, dtype_max = _get_values( - values, skipna, fill_value_typ=fill_value_typ, ) + values, skipna, fill_value_typ=fill_value_typ, mask=mask) if ((axis is not None and values.shape[axis] == 0) or values.size == 0): From 424e252fdb7f8f93b841bc244b180f07b3b2f12c Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 27 Sep 2018 22:15:25 +0100 Subject: [PATCH 04/10] Add unit test --- pandas/tests/test_nanops.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index b6c2c65fb6dce..7104187226bfe 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1,19 +1,19 @@ # -*- coding: utf-8 -*- from __future__ import division, print_function +import warnings from functools import partial -import pytest -import warnings import numpy as np +import pytest import pandas as pd -from pandas import Series, isna -from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops -import pandas.util.testing as tm import pandas.util._test_decorators as td +import pandas.util.testing as tm +from pandas import Series, isna from pandas.compat.numpy import _np_version_under1p13 +from pandas.core.dtypes.common import is_integer_dtype use_bn = nanops._USE_BOTTLENECK @@ -1041,3 +1041,29 @@ def test_numpy_ops_np_version_under1p13(numpy_op, expected): assert result == expected else: assert result == expected + + +@pytest.mark.parametrize("operation",[ + nanops.nanany, + nanops.nanall, + nanops.nansum, + nanops.nanmean, + nanops.nanmedian, + nanops.nanstd, + nanops.nanvar, + nanops.nansem, + nanops.nanargmax, + nanops.nanargmin, + nanops.nanmax, + nanops.nanmin, + nanops.nanskew, + nanops.nankurt, + nanops.nanprod, +]) +def test_nanops_independent_of_mask_param(operation): + # GH22764 + s = pd.Series([1, 2, np.nan, 3, np.nan, 4]) + mask = s.isna() + median_expected = operation(s) + median_result = operation(s, mask=mask) + assert median_expected == median_result From 0e42c571e5f16b139cb6718e5aa726daf09662c7 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sat, 29 Sep 2018 12:23:52 +0100 Subject: [PATCH 05/10] Pep8 --- pandas/tests/test_nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 7104187226bfe..b06463d3c07aa 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1043,7 +1043,7 @@ def test_numpy_ops_np_version_under1p13(numpy_op, expected): assert result == expected -@pytest.mark.parametrize("operation",[ +@pytest.mark.parametrize("operation", [ nanops.nanany, nanops.nanall, nanops.nansum, From 060c7652dbcd2161b7453966aaaf017c437a3f57 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sat, 29 Sep 2018 13:01:12 +0100 Subject: [PATCH 06/10] Improve my docstrings --- pandas/core/nanops.py | 104 +++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 41 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index b73f5835d55d9..0af7a73e10e48 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -325,9 +325,10 @@ def nanany(values, axis=None, skipna=True, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known Returns ------- @@ -357,9 +358,10 @@ def nanall(values, axis=None, skipna=True, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known Returns ------- @@ -390,10 +392,11 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - min_count: int - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -423,9 +426,10 @@ def nanmean(values, axis=None, skipna=True, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -463,9 +467,10 @@ def nanmedian(values, axis=None, skipna=True, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -543,9 +548,13 @@ def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -568,9 +577,13 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -625,9 +638,13 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -684,9 +701,10 @@ def nanargmax(values, axis=None, skipna=True, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -712,9 +730,10 @@ def nanargmin(values, axis=None, skipna=True, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -745,9 +764,10 @@ def nanskew(values, axis=None, skipna=True, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -818,9 +838,10 @@ def nankurt(values, axis=None, skipna=True, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known Examples -------- @@ -894,10 +915,11 @@ def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): Parameters ---------- values : ndarray - axis : int - skipna : bool - min_count: int - mask : ndarray nan-mask if known + axis: int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known Examples -------- From 67f2020f4fbb5ab6b65c0bfcc407f7b9e3a914f0 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 30 Sep 2018 18:51:26 +0100 Subject: [PATCH 07/10] Add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 851c1a3fbd6e9..a52e554394176 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -834,4 +834,5 @@ Other - :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`) - :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) +- ``nanops`` methods now take an optional `mask` parameter to avoid recalculating mask if known. (:issue:`22764`) - From 021cc6b8db91e708aee08dd49d64a3dc2a4288a3 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 30 Sep 2018 18:58:05 +0100 Subject: [PATCH 08/10] Fix up docstrings --- pandas/core/nanops.py | 72 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 0af7a73e10e48..7433d50ff2548 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -391,13 +391,17 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None): Parameters ---------- - values : ndarray + values : ndarray[dtype] axis: int, optional skipna : bool, default True min_count: int, default 0 mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : dtype + Examples -------- >>> import pandas.core.nanops as nanops @@ -431,6 +435,12 @@ def nanmean(values, axis=None, skipna=True, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + Examples -------- >>> import pandas.core.nanops as nanops @@ -472,6 +482,12 @@ def nanmedian(values, axis=None, skipna=True, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + Examples -------- >>> import pandas.core.nanops as nanops @@ -556,6 +572,12 @@ def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + Examples -------- >>> import pandas.core.nanops as nanops @@ -585,6 +607,12 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + Examples -------- >>> import pandas.core.nanops as nanops @@ -646,6 +674,12 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + Examples -------- >>> import pandas.core.nanops as nanops @@ -706,16 +740,17 @@ def nanargmax(values, axis=None, skipna=True, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + -------- + result : int + The index of max value in specified axis or -1 in the NA case + Examples -------- >>> import pandas.core.nanops as nanops >>> s = pd.Series([1, 2, 3, np.nan, 4]) >>> nanops.nanargmax(s) 4 - - Returns - -------- - The index of max value in specified axis or -1 in the NA case """ values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf', mask=mask) @@ -735,16 +770,17 @@ def nanargmin(values, axis=None, skipna=True, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + -------- + result : int + The index of min value in specified axis or -1 in the NA case + Examples -------- >>> import pandas.core.nanops as nanops >>> s = pd.Series([1, 2, 3, np.nan, 4]) >>> nanops.nanargmin(s) 0 - - Returns - -------- - The index of min value in specified axis or -1 in the NA case """ values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf', mask=mask) @@ -769,6 +805,12 @@ def nanskew(values, axis=None, skipna=True, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + Examples -------- >>> import pandas.core.nanops as nanops @@ -843,6 +885,12 @@ def nankurt(values, axis=None, skipna=True, mask=None): mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + Examples -------- >>> import pandas.core.nanops as nanops @@ -914,13 +962,17 @@ def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): """ Parameters ---------- - values : ndarray + values : ndarray[dtype] axis: int, optional skipna : bool, default True min_count: int, default 0 mask : ndarray[bool], optional nan-mask if known + Returns + ------- + result : dtype + Examples -------- >>> import pandas.core.nanops as nanops From 043ba31dea2ecfd4a28b564d53036c79786d1b20 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Tue, 2 Oct 2018 22:32:54 +0100 Subject: [PATCH 09/10] Remove whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 704895eae8aee..a240f1fd85dd0 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -834,5 +834,4 @@ Other - :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`) - :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) -- ``nanops`` methods now take an optional `mask` parameter to avoid recalculating mask if known. (:issue:`22764`) - From dacdbec03c57d531d11bd1524610e313ab49b2fd Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Tue, 2 Oct 2018 22:33:22 +0100 Subject: [PATCH 10/10] Update v0.24.0.txt --- doc/source/whatsnew/v0.24.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a240f1fd85dd0..9b71ab656920d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -834,4 +834,3 @@ Other - :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`) - :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) --