diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b4f8669ba9d..63139231d75 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -7137,7 +7137,7 @@ def explode(self, column, ignore_index=False): return super()._explode(column, ignore_index) def pct_change( - self, periods=1, fill_method="ffill", limit=None, freq=None + self, periods=1, fill_method=no_default, limit=no_default, freq=None ): """ Calculates the percent change between sequential elements @@ -7149,9 +7149,15 @@ def pct_change( Periods to shift for forming percent change. fill_method : str, default 'ffill' How to handle NAs before computing percent changes. + + .. deprecated:: 23.12 + All options of `fill_method` are deprecated except `fill_method=None`. limit : int, optional The number of consecutive NAs to fill before stopping. Not yet implemented. + + .. deprecated:: 23.12 + `limit` is deprecated. freq : str, optional Increment to use from time series API. Not yet implemented. @@ -7160,16 +7166,38 @@ def pct_change( ------- DataFrame """ - if limit is not None: + if limit is not no_default: raise NotImplementedError("limit parameter not supported yet.") if freq is not None: raise NotImplementedError("freq parameter not supported yet.") - elif fill_method not in {"ffill", "pad", "bfill", "backfill"}: + elif fill_method not in { + no_default, + None, + "ffill", + "pad", + "bfill", + "backfill", + }: raise ValueError( - "fill_method must be one of 'ffill', 'pad', " + "fill_method must be one of None, 'ffill', 'pad', " "'bfill', or 'backfill'." ) + if fill_method not in (no_default, None) or limit is not no_default: + # Do not remove until pandas 3.0 support is added. + warnings.warn( + "The 'fill_method' and 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Either fill in any non-leading NA values prior " + "to calling pct_change or specify 'fill_method=None' to not fill NA " + "values.", + FutureWarning, + ) + if fill_method is no_default: + fill_method = "ffill" + if limit is no_default: + limit = None + with warnings.catch_warnings(): warnings.simplefilter("ignore") data = self.fillna(method=fill_method, limit=limit) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index bad5106970e..414a86470f0 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -21,6 +21,7 @@ from cudf._lib.sort import segmented_sort_by_key from cudf._lib.types import size_type_dtype from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType +from cudf.api.extensions import no_default from cudf.api.types import is_bool_dtype, is_float_dtype, is_list_like from cudf.core.abc import Serializable from cudf.core.column.column import ColumnBase, arange, as_column @@ -2286,7 +2287,12 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): @_cudf_nvtx_annotate def pct_change( - self, periods=1, fill_method="ffill", axis=0, limit=None, freq=None + self, + periods=1, + fill_method=no_default, + axis=0, + limit=no_default, + freq=None, ): """ Calculates the percent change between sequential elements @@ -2298,9 +2304,15 @@ def pct_change( Periods to shift for forming percent change. fill_method : str, default 'ffill' How to handle NAs before computing percent changes. + + .. deprecated:: 23.12 + All options of `fill_method` are deprecated except `fill_method=None`. limit : int, optional The number of consecutive NAs to fill before stopping. Not yet implemented. + + .. deprecated:: 23.12 + `limit` is deprecated. freq : str, optional Increment to use from time series API. Not yet implemented. @@ -2312,25 +2324,31 @@ def pct_change( """ if not axis == 0: raise NotImplementedError("Only axis=0 is supported.") - if limit is not None: + if limit is not no_default: raise NotImplementedError("limit parameter not supported yet.") if freq is not None: raise NotImplementedError("freq parameter not supported yet.") - elif fill_method not in {"ffill", "pad", "bfill", "backfill"}: + elif fill_method not in {no_default, None, "ffill", "bfill"}: raise ValueError( - "fill_method must be one of 'ffill', 'pad', " - "'bfill', or 'backfill'." + "fill_method must be one of 'ffill', or" "'bfill'." ) - if fill_method in ("pad", "backfill"): - alternative = "ffill" if fill_method == "pad" else "bfill" - # Do not remove until pandas 2.0 support is added. + if fill_method not in (no_default, None) or limit is not no_default: + # Do not remove until pandas 3.0 support is added. warnings.warn( - f"{fill_method} is deprecated and will be removed in a future " - f"version. Use f{alternative} instead.", + "The 'fill_method' keyword being not None and the 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Either fill in any non-leading NA values prior " + "to calling pct_change or specify 'fill_method=None' to not fill NA " + "values.", FutureWarning, ) + if fill_method in (no_default, None): + fill_method = "ffill" + if limit is no_default: + limit = None + with warnings.catch_warnings(): warnings.simplefilter("ignore") filled = self.fillna(method=fill_method, limit=limit) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index f0323d6f55b..f9987569070 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -3569,7 +3569,7 @@ def explode(self, ignore_index=False): @_cudf_nvtx_annotate def pct_change( - self, periods=1, fill_method="ffill", limit=None, freq=None + self, periods=1, fill_method=no_default, limit=no_default, freq=None ): """ Calculates the percent change between sequential elements @@ -3581,9 +3581,15 @@ def pct_change( Periods to shift for forming percent change. fill_method : str, default 'ffill' How to handle NAs before computing percent changes. + + .. deprecated:: 23.12 + All options of `fill_method` are deprecated except `fill_method=None`. limit : int, optional The number of consecutive NAs to fill before stopping. Not yet implemented. + + .. deprecated:: 23.12 + `limit` is deprecated. freq : str, optional Increment to use from time series API. Not yet implemented. @@ -3592,15 +3598,37 @@ def pct_change( ------- Series """ - if limit is not None: + if limit is not no_default: raise NotImplementedError("limit parameter not supported yet.") if freq is not None: raise NotImplementedError("freq parameter not supported yet.") - elif fill_method not in {"ffill", "pad", "bfill", "backfill"}: + elif fill_method not in { + no_default, + None, + "ffill", + "pad", + "bfill", + "backfill", + }: raise ValueError( - "fill_method must be one of 'ffill', 'pad', " + "fill_method must be one of None, 'ffill', 'pad', " "'bfill', or 'backfill'." ) + if fill_method not in (no_default, None) or limit is not no_default: + # Do not remove until pandas 3.0 support is added. + warnings.warn( + "The 'fill_method' and 'limit' keywords in " + f"{type(self).__name__}.pct_change are deprecated and will be " + "removed in a future version. Either fill in any non-leading NA values prior " + "to calling pct_change or specify 'fill_method=None' to not fill NA " + "values.", + FutureWarning, + ) + + if fill_method is no_default: + fill_method = "ffill" + if limit is no_default: + limit = None with warnings.catch_warnings(): warnings.simplefilter("ignore") diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 9192e5e7ca0..9a51ef5ed57 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -25,8 +25,10 @@ PANDAS_GE_134, PANDAS_GE_150, PANDAS_GE_200, + PANDAS_GE_210, PANDAS_LT_140, ) +from cudf.api.extensions import no_default from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.column import column from cudf.testing import _utils as utils @@ -9896,13 +9898,20 @@ def test_dataframe_rename_duplicate_column(): ], ) @pytest.mark.parametrize("periods", [-5, -2, 0, 2, 5]) -@pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"]) +@pytest.mark.parametrize( + "fill_method", ["ffill", "bfill", "pad", "backfill", no_default] +) def test_dataframe_pct_change(data, periods, fill_method): gdf = cudf.DataFrame(data) pdf = gdf.to_pandas() - actual = gdf.pct_change(periods=periods, fill_method=fill_method) - expected = pdf.pct_change(periods=periods, fill_method=fill_method) + with expect_warning_if(fill_method is not no_default): + actual = gdf.pct_change(periods=periods, fill_method=fill_method) + with expect_warning_if( + PANDAS_GE_210 + and (fill_method is not no_default or pdf.isna().any().any()) + ): + expected = pdf.pct_change(periods=periods, fill_method=fill_method) assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 65c48c1b12d..fd0f7863d2b 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -19,6 +19,7 @@ import cudf from cudf import DataFrame, Series +from cudf.api.extensions import no_default from cudf.core._compat import ( PANDAS_GE_150, PANDAS_LT_140, @@ -3062,17 +3063,25 @@ def test_groupby_transform_maintain_index(by): ], ) @pytest.mark.parametrize("periods", [-5, -2, 0, 2, 5]) -@pytest.mark.parametrize("fill_method", ["ffill", "bfill"]) +@pytest.mark.parametrize("fill_method", ["ffill", "bfill", no_default, None]) def test_groupby_pct_change(data, gkey, periods, fill_method): gdf = cudf.DataFrame(data) pdf = gdf.to_pandas() - actual = gdf.groupby(gkey).pct_change( - periods=periods, fill_method=fill_method - ) - expected = pdf.groupby(gkey).pct_change( - periods=periods, fill_method=fill_method - ) + with expect_warning_if(fill_method not in (no_default, None)): + actual = gdf.groupby(gkey).pct_change( + periods=periods, fill_method=fill_method + ) + with expect_warning_if( + PANDAS_GE_210 + and ( + fill_method not in (no_default, None) + or (fill_method is not None and pdf.isna().any().any()) + ) + ): + expected = pdf.groupby(gkey).pct_change( + periods=periods, fill_method=fill_method + ) assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py index 8eae74a34f7..41fac49ea83 100644 --- a/python/cudf/cudf/tests/test_stats.py +++ b/python/cudf/cudf/tests/test_stats.py @@ -8,6 +8,7 @@ import pytest import cudf +from cudf.api.extensions import no_default from cudf.datasets import randomdata from cudf.testing._utils import ( _create_cudf_series_float64_default, @@ -16,6 +17,7 @@ assert_exceptions_equal, expect_warning_if, ) +from cudf.core._compat import PANDAS_GE_210 params_dtypes = [np.int32, np.uint32, np.float32, np.float64] methods = ["min", "max", "sum", "mean", "var", "std"] @@ -356,14 +358,24 @@ def test_series_median(dtype, num_na): ], ) @pytest.mark.parametrize("periods", range(-5, 5)) -@pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"]) +@pytest.mark.parametrize( + "fill_method", ["ffill", "bfill", "pad", "backfill", no_default, None] +) def test_series_pct_change(data, periods, fill_method): cs = cudf.Series(data) ps = cs.to_pandas() if np.abs(periods) <= len(cs): - got = cs.pct_change(periods=periods, fill_method=fill_method) - expected = ps.pct_change(periods=periods, fill_method=fill_method) + with expect_warning_if(fill_method not in (no_default, None)): + got = cs.pct_change(periods=periods, fill_method=fill_method) + with expect_warning_if( + PANDAS_GE_210 + and ( + fill_method not in (no_default, None) + or (fill_method is not None and ps.isna().any()) + ) + ): + expected = ps.pct_change(periods=periods, fill_method=fill_method) np.testing.assert_array_almost_equal( got.to_numpy(na_value=np.nan), expected )