Skip to content

Commit

Permalink
Deprecate NDFrame.filter
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Oct 7, 2019
1 parent af498fe commit da9b64e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 123 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ Deprecations
- ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``,
value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)``
is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`).
- :meth:`DataFrame.filter` and :meth:`Series.filter` are deprecated. (:issue:`26642`)
-

.. _whatsnew_1000.prior_deprecations:
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4638,7 +4638,11 @@ def filter(self, items=None, like=None, regex=None, axis=None):
Subset rows or columns of dataframe according to labels in
the specified index.
Note that this routine does not filter a dataframe on its
.. deprecated:: 1.0
Use .loc instead, e.g. for regular expressions
use .loc(regex=True)[:, "^col_"]
Note that this method does not filter a dataframe on its
contents. The filter is applied to the labels of the index.
Parameters
Expand Down Expand Up @@ -4693,6 +4697,13 @@ def filter(self, items=None, like=None, regex=None, axis=None):
one two three
rabbit 4 5 6
"""
warnings.warn(
"DataFrame/Series.filter is deprecated "
"and will be removed in a future version",
FutureWarning,
stacklevel=2,
)

nkw = com.count_not_none(items, like, regex)
if nkw > 1:
raise TypeError(
Expand Down
125 changes: 3 additions & 122 deletions pandas/tests/frame/test_axis_select_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,128 +807,9 @@ def test_align_series_combinations(self):
tm.assert_series_equal(res1, exp2)
tm.assert_frame_equal(res2, exp1)

def test_filter(self, float_frame, float_string_frame):
# Items
filtered = float_frame.filter(["A", "B", "E"])
assert len(filtered.columns) == 2
assert "E" not in filtered

filtered = float_frame.filter(["A", "B", "E"], axis="columns")
assert len(filtered.columns) == 2
assert "E" not in filtered

# Other axis
idx = float_frame.index[0:4]
filtered = float_frame.filter(idx, axis="index")
expected = float_frame.reindex(index=idx)
tm.assert_frame_equal(filtered, expected)

# like
fcopy = float_frame.copy()
fcopy["AA"] = 1

filtered = fcopy.filter(like="A")
assert len(filtered.columns) == 2
assert "AA" in filtered

# like with ints in column names
df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
filtered = df.filter(like="_")
assert len(filtered.columns) == 2

# regex with ints in column names
# from PR #10384
df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
expected = DataFrame(
0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)
)
filtered = df.filter(regex="^[0-9]+$")
tm.assert_frame_equal(filtered, expected)

expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
# shouldn't remove anything
filtered = expected.filter(regex="^[0-9]+$")
tm.assert_frame_equal(filtered, expected)

# pass in None
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter()
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter(items=None)
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter(axis=1)

# test mutually exclusive arguments
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$", like="bbi")
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$", axis=1)
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$")
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], like="bbi", axis=0)
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], like="bbi")

# objects
filtered = float_string_frame.filter(like="foo")
assert "foo" in filtered

# unicode columns, won't ascii-encode
df = float_frame.rename(columns={"B": "\u2202"})
filtered = df.filter(like="C")
assert "C" in filtered

def test_filter_regex_search(self, float_frame):
fcopy = float_frame.copy()
fcopy["AA"] = 1

# regex
filtered = fcopy.filter(regex="[A]+")
assert len(filtered.columns) == 2
assert "AA" in filtered

# doesn't have to be at beginning
df = DataFrame(
{"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]}
)

result = df.filter(regex="BB")
exp = df[[x for x in df.columns if "BB" in x]]
assert_frame_equal(result, exp)

@pytest.mark.parametrize(
"name,expected",
[
("a", DataFrame({"a": [1, 2]})),
("a", DataFrame({"a": [1, 2]})),
("あ", DataFrame({"あ": [3, 4]})),
],
)
def test_filter_unicode(self, name, expected):
# GH13101
df = DataFrame({"a": [1, 2], "あ": [3, 4]})

assert_frame_equal(df.filter(like=name), expected)
assert_frame_equal(df.filter(regex=name), expected)

@pytest.mark.parametrize("name", ["a", "a"])
def test_filter_bytestring(self, name):
# GH13101
df = DataFrame({b"a": [1, 2], b"b": [3, 4]})
expected = DataFrame({b"a": [1, 2]})

assert_frame_equal(df.filter(like=name), expected)
assert_frame_equal(df.filter(regex=name), expected)

def test_filter_corner(self):
empty = DataFrame()

result = empty.filter([])
assert_frame_equal(result, empty)

result = empty.filter(like="foo")
assert_frame_equal(result, empty)
def test_filter_deprecated(self, float_frame):
with tm.assert_produces_warning(FutureWarning):
float_frame.filter(["A", "B", "E"])

def test_take(self, float_frame):
# homogeneous
Expand Down

0 comments on commit da9b64e

Please sign in to comment.