Deprecate NDFrame.filter

pandas-dev · Dec 31, 2019 · 617e231 · 617e231
1 parent 844dc4a
commit 617e231
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 131 deletions.
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -581,6 +581,7 @@ Deprecations
   it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`).
 - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
 - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
+- :meth:`DataFrame.filter` and :meth:`Series.filter` are deprecated. (:issue:`26642`)
 
 .. _whatsnew_1000.prior_deprecations:
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4557,7 +4557,11 @@ def filter(
         """
         Subset the dataframe rows or columns according to the specified index labels.
 
-        Note that this routine does not filter a dataframe on its
+        .. deprecated:: 1.0
+            Use .loc instead, e.g. for regular expressions
+            use .loc(regex=True)[:, "^col_"]
+
+        Note that this method does not filter a dataframe on its
         contents. The filter is applied to the labels of the index.
 
         Parameters
@@ -4612,6 +4616,13 @@ def filter(
                  one  two  three
         rabbit    4    5      6
         """
+        warnings.warn(
+            "DataFrame/Series.filter is deprecated "
+            "and will be removed in a future version",
+            FutureWarning,
+            stacklevel=2,
+        )
+
         nkw = com.count_not_none(items, like, regex)
         if nkw > 1:
             raise TypeError(

diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
@@ -806,136 +806,9 @@ def test_align_series_combinations(self):
         tm.assert_series_equal(res1, exp2)
         tm.assert_frame_equal(res2, exp1)
 
-    def test_filter(self, float_frame, float_string_frame):
-        # Items
-        filtered = float_frame.filter(["A", "B", "E"])
-        assert len(filtered.columns) == 2
-        assert "E" not in filtered
-
-        filtered = float_frame.filter(["A", "B", "E"], axis="columns")
-        assert len(filtered.columns) == 2
-        assert "E" not in filtered
-
-        # Other axis
-        idx = float_frame.index[0:4]
-        filtered = float_frame.filter(idx, axis="index")
-        expected = float_frame.reindex(index=idx)
-        tm.assert_frame_equal(filtered, expected)
-
-        # like
-        fcopy = float_frame.copy()
-        fcopy["AA"] = 1
-
-        filtered = fcopy.filter(like="A")
-        assert len(filtered.columns) == 2
-        assert "AA" in filtered
-
-        # like with ints in column names
-        df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
-        filtered = df.filter(like="_")
-        assert len(filtered.columns) == 2
-
-        # regex with ints in column names
-        # from PR #10384
-        df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
-        expected = DataFrame(
-            0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)
-        )
-        filtered = df.filter(regex="^[0-9]+$")
-        tm.assert_frame_equal(filtered, expected)
-
-        expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
-        # shouldn't remove anything
-        filtered = expected.filter(regex="^[0-9]+$")
-        tm.assert_frame_equal(filtered, expected)
-
-        # pass in None
-        with pytest.raises(TypeError, match="Must pass"):
-            float_frame.filter()
-        with pytest.raises(TypeError, match="Must pass"):
-            float_frame.filter(items=None)
-        with pytest.raises(TypeError, match="Must pass"):
-            float_frame.filter(axis=1)
-
-        # test mutually exclusive arguments
-        with pytest.raises(TypeError, match="mutually exclusive"):
-            float_frame.filter(items=["one", "three"], regex="e$", like="bbi")
-        with pytest.raises(TypeError, match="mutually exclusive"):
-            float_frame.filter(items=["one", "three"], regex="e$", axis=1)
-        with pytest.raises(TypeError, match="mutually exclusive"):
-            float_frame.filter(items=["one", "three"], regex="e$")
-        with pytest.raises(TypeError, match="mutually exclusive"):
-            float_frame.filter(items=["one", "three"], like="bbi", axis=0)
-        with pytest.raises(TypeError, match="mutually exclusive"):
-            float_frame.filter(items=["one", "three"], like="bbi")
-
-        # objects
-        filtered = float_string_frame.filter(like="foo")
-        assert "foo" in filtered
-
-        # unicode columns, won't ascii-encode
-        df = float_frame.rename(columns={"B": "\u2202"})
-        filtered = df.filter(like="C")
-        assert "C" in filtered
-
-    def test_filter_regex_search(self, float_frame):
-        fcopy = float_frame.copy()
-        fcopy["AA"] = 1
-
-        # regex
-        filtered = fcopy.filter(regex="[A]+")
-        assert len(filtered.columns) == 2
-        assert "AA" in filtered
-
-        # doesn't have to be at beginning
-        df = DataFrame(
-            {"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]}
-        )
-
-        result = df.filter(regex="BB")
-        exp = df[[x for x in df.columns if "BB" in x]]
-        tm.assert_frame_equal(result, exp)
-
-    @pytest.mark.parametrize(
-        "name,expected",
-        [
-            ("a", DataFrame({"a": [1, 2]})),
-            ("a", DataFrame({"a": [1, 2]})),
-            ("あ", DataFrame({"あ": [3, 4]})),
-        ],
-    )
-    def test_filter_unicode(self, name, expected):
-        # GH13101
-        df = DataFrame({"a": [1, 2], "あ": [3, 4]})
-
-        tm.assert_frame_equal(df.filter(like=name), expected)
-        tm.assert_frame_equal(df.filter(regex=name), expected)
-
-    @pytest.mark.parametrize("name", ["a", "a"])
-    def test_filter_bytestring(self, name):
-        # GH13101
-        df = DataFrame({b"a": [1, 2], b"b": [3, 4]})
-        expected = DataFrame({b"a": [1, 2]})
-
-        tm.assert_frame_equal(df.filter(like=name), expected)
-        tm.assert_frame_equal(df.filter(regex=name), expected)
-
-    def test_filter_corner(self):
-        empty = DataFrame()
-
-        result = empty.filter([])
-        tm.assert_frame_equal(result, empty)
-
-        result = empty.filter(like="foo")
-        tm.assert_frame_equal(result, empty)
-
-    def test_filter_regex_non_string(self):
-        # GH#5798 trying to filter on non-string columns should drop,
-        #  not raise
-        df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123])
-        result = df.filter(regex="STRING")
-        expected = df[["STRING"]]
-        tm.assert_frame_equal(result, expected)
+    def test_filter_deprecated(self, float_frame):
+        with tm.assert_produces_warning(FutureWarning):
+            float_frame.filter(["A", "B", "E"])
 
     def test_take(self, float_frame):
         # homogeneous