diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 8755abe642068..79027a69bb81a 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -581,6 +581,7 @@ Deprecations it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) +- :meth:`DataFrame.filter` and :meth:`Series.filter` are deprecated. (:issue:`26642`) .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b896721469f1f..a8bea0c32b878 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4557,7 +4557,11 @@ def filter( """ Subset the dataframe rows or columns according to the specified index labels. - Note that this routine does not filter a dataframe on its + .. deprecated:: 1.0 + Use .loc instead, e.g. for regular expressions + use .loc(regex=True)[:, "^col_"] + + Note that this method does not filter a dataframe on its contents. The filter is applied to the labels of the index. Parameters @@ -4612,6 +4616,13 @@ def filter( one two three rabbit 4 5 6 """ + warnings.warn( + "DataFrame/Series.filter is deprecated " + "and will be removed in a future version", + FutureWarning, + stacklevel=2, + ) + nkw = com.count_not_none(items, like, regex) if nkw > 1: raise TypeError( diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index d6ef3a7600abb..407393074c24f 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -806,136 +806,9 @@ def test_align_series_combinations(self): tm.assert_series_equal(res1, exp2) tm.assert_frame_equal(res2, exp1) - def test_filter(self, float_frame, float_string_frame): - # Items - filtered = float_frame.filter(["A", "B", "E"]) - assert len(filtered.columns) == 2 - assert "E" not in filtered - - filtered = float_frame.filter(["A", "B", "E"], axis="columns") - assert len(filtered.columns) == 2 - assert "E" not in filtered - - # Other axis - idx = float_frame.index[0:4] - filtered = float_frame.filter(idx, axis="index") - expected = float_frame.reindex(index=idx) - tm.assert_frame_equal(filtered, expected) - - # like - fcopy = float_frame.copy() - fcopy["AA"] = 1 - - filtered = fcopy.filter(like="A") - assert len(filtered.columns) == 2 - assert "AA" in filtered - - # like with ints in column names - df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"]) - filtered = df.filter(like="_") - assert len(filtered.columns) == 2 - - # regex with ints in column names - # from PR #10384 - df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"]) - expected = DataFrame( - 0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object) - ) - filtered = df.filter(regex="^[0-9]+$") - tm.assert_frame_equal(filtered, expected) - - expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"]) - # shouldn't remove anything - filtered = expected.filter(regex="^[0-9]+$") - tm.assert_frame_equal(filtered, expected) - - # pass in None - with pytest.raises(TypeError, match="Must pass"): - float_frame.filter() - with pytest.raises(TypeError, match="Must pass"): - float_frame.filter(items=None) - with pytest.raises(TypeError, match="Must pass"): - float_frame.filter(axis=1) - - # test mutually exclusive arguments - with pytest.raises(TypeError, match="mutually exclusive"): - float_frame.filter(items=["one", "three"], regex="e$", like="bbi") - with pytest.raises(TypeError, match="mutually exclusive"): - float_frame.filter(items=["one", "three"], regex="e$", axis=1) - with pytest.raises(TypeError, match="mutually exclusive"): - float_frame.filter(items=["one", "three"], regex="e$") - with pytest.raises(TypeError, match="mutually exclusive"): - float_frame.filter(items=["one", "three"], like="bbi", axis=0) - with pytest.raises(TypeError, match="mutually exclusive"): - float_frame.filter(items=["one", "three"], like="bbi") - - # objects - filtered = float_string_frame.filter(like="foo") - assert "foo" in filtered - - # unicode columns, won't ascii-encode - df = float_frame.rename(columns={"B": "\u2202"}) - filtered = df.filter(like="C") - assert "C" in filtered - - def test_filter_regex_search(self, float_frame): - fcopy = float_frame.copy() - fcopy["AA"] = 1 - - # regex - filtered = fcopy.filter(regex="[A]+") - assert len(filtered.columns) == 2 - assert "AA" in filtered - - # doesn't have to be at beginning - df = DataFrame( - {"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]} - ) - - result = df.filter(regex="BB") - exp = df[[x for x in df.columns if "BB" in x]] - tm.assert_frame_equal(result, exp) - - @pytest.mark.parametrize( - "name,expected", - [ - ("a", DataFrame({"a": [1, 2]})), - ("a", DataFrame({"a": [1, 2]})), - ("あ", DataFrame({"あ": [3, 4]})), - ], - ) - def test_filter_unicode(self, name, expected): - # GH13101 - df = DataFrame({"a": [1, 2], "あ": [3, 4]}) - - tm.assert_frame_equal(df.filter(like=name), expected) - tm.assert_frame_equal(df.filter(regex=name), expected) - - @pytest.mark.parametrize("name", ["a", "a"]) - def test_filter_bytestring(self, name): - # GH13101 - df = DataFrame({b"a": [1, 2], b"b": [3, 4]}) - expected = DataFrame({b"a": [1, 2]}) - - tm.assert_frame_equal(df.filter(like=name), expected) - tm.assert_frame_equal(df.filter(regex=name), expected) - - def test_filter_corner(self): - empty = DataFrame() - - result = empty.filter([]) - tm.assert_frame_equal(result, empty) - - result = empty.filter(like="foo") - tm.assert_frame_equal(result, empty) - - def test_filter_regex_non_string(self): - # GH#5798 trying to filter on non-string columns should drop, - # not raise - df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123]) - result = df.filter(regex="STRING") - expected = df[["STRING"]] - tm.assert_frame_equal(result, expected) + def test_filter_deprecated(self, float_frame): + with tm.assert_produces_warning(FutureWarning): + float_frame.filter(["A", "B", "E"]) def test_take(self, float_frame): # homogeneous