Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Deprecate NDFrame.filter #27617

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ Deprecations
it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`).
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
- :meth:`DataFrame.filter` and :meth:`Series.filter` are deprecated. (:issue:`26642`)

.. _whatsnew_1000.prior_deprecations:

Expand Down
13 changes: 12 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4557,7 +4557,11 @@ def filter(
"""
Subset the dataframe rows or columns according to the specified index labels.

Note that this routine does not filter a dataframe on its
.. deprecated:: 1.0
Use .loc instead, e.g. for regular expressions
use .loc(regex=True)[:, "^col_"]

Note that this method does not filter a dataframe on its
contents. The filter is applied to the labels of the index.

Parameters
Expand Down Expand Up @@ -4612,6 +4616,13 @@ def filter(
one two three
rabbit 4 5 6
"""
warnings.warn(
"DataFrame/Series.filter is deprecated "
"and will be removed in a future version",
FutureWarning,
stacklevel=2,
)

nkw = com.count_not_none(items, like, regex)
if nkw > 1:
raise TypeError(
Expand Down
133 changes: 3 additions & 130 deletions pandas/tests/frame/test_axis_select_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,136 +806,9 @@ def test_align_series_combinations(self):
tm.assert_series_equal(res1, exp2)
tm.assert_frame_equal(res2, exp1)

def test_filter(self, float_frame, float_string_frame):
# Items
filtered = float_frame.filter(["A", "B", "E"])
assert len(filtered.columns) == 2
assert "E" not in filtered

filtered = float_frame.filter(["A", "B", "E"], axis="columns")
assert len(filtered.columns) == 2
assert "E" not in filtered

# Other axis
idx = float_frame.index[0:4]
filtered = float_frame.filter(idx, axis="index")
expected = float_frame.reindex(index=idx)
tm.assert_frame_equal(filtered, expected)

# like
fcopy = float_frame.copy()
fcopy["AA"] = 1

filtered = fcopy.filter(like="A")
assert len(filtered.columns) == 2
assert "AA" in filtered

# like with ints in column names
df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
filtered = df.filter(like="_")
assert len(filtered.columns) == 2

# regex with ints in column names
# from PR #10384
df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
expected = DataFrame(
0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)
)
filtered = df.filter(regex="^[0-9]+$")
tm.assert_frame_equal(filtered, expected)

expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
# shouldn't remove anything
filtered = expected.filter(regex="^[0-9]+$")
tm.assert_frame_equal(filtered, expected)

# pass in None
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter()
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter(items=None)
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter(axis=1)

# test mutually exclusive arguments
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$", like="bbi")
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$", axis=1)
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$")
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], like="bbi", axis=0)
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], like="bbi")

# objects
filtered = float_string_frame.filter(like="foo")
assert "foo" in filtered

# unicode columns, won't ascii-encode
df = float_frame.rename(columns={"B": "\u2202"})
filtered = df.filter(like="C")
assert "C" in filtered

def test_filter_regex_search(self, float_frame):
fcopy = float_frame.copy()
fcopy["AA"] = 1

# regex
filtered = fcopy.filter(regex="[A]+")
assert len(filtered.columns) == 2
assert "AA" in filtered

# doesn't have to be at beginning
df = DataFrame(
{"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]}
)

result = df.filter(regex="BB")
exp = df[[x for x in df.columns if "BB" in x]]
tm.assert_frame_equal(result, exp)

@pytest.mark.parametrize(
"name,expected",
[
("a", DataFrame({"a": [1, 2]})),
("a", DataFrame({"a": [1, 2]})),
("あ", DataFrame({"あ": [3, 4]})),
],
)
def test_filter_unicode(self, name, expected):
# GH13101
df = DataFrame({"a": [1, 2], "あ": [3, 4]})

tm.assert_frame_equal(df.filter(like=name), expected)
tm.assert_frame_equal(df.filter(regex=name), expected)

@pytest.mark.parametrize("name", ["a", "a"])
def test_filter_bytestring(self, name):
# GH13101
df = DataFrame({b"a": [1, 2], b"b": [3, 4]})
expected = DataFrame({b"a": [1, 2]})

tm.assert_frame_equal(df.filter(like=name), expected)
tm.assert_frame_equal(df.filter(regex=name), expected)

def test_filter_corner(self):
empty = DataFrame()

result = empty.filter([])
tm.assert_frame_equal(result, empty)

result = empty.filter(like="foo")
tm.assert_frame_equal(result, empty)

def test_filter_regex_non_string(self):
# GH#5798 trying to filter on non-string columns should drop,
# not raise
df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123])
result = df.filter(regex="STRING")
expected = df[["STRING"]]
tm.assert_frame_equal(result, expected)
def test_filter_deprecated(self, float_frame):
with tm.assert_produces_warning(FutureWarning):
float_frame.filter(["A", "B", "E"])

def test_take(self, float_frame):
# homogeneous
Expand Down