Skip to content

Commit

Permalink
BUG: Don't with empty Series for .isin (pandas-dev#17006)
Browse files Browse the repository at this point in the history
Empty Series initializes to float64, even when the data type is object for .isin,
leading to an error with membership.

Closes pandas-devgh-16991.
  • Loading branch information
gfyoung authored and alanbato committed Nov 10, 2017
1 parent 452225c commit 59c7dde
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,4 @@ Other
^^^^^
- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
- The ``Series`` constructor with no arguments would have an index like ``Index([], dtype='object')`` instead of ``RangeIndex(start=0, stop=0, step=1)``
- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
2 changes: 2 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def _ensure_data(values, dtype=None):

# we check some simple dtypes first
try:
if is_object_dtype(dtype):
return _ensure_object(np.asarray(values)), 'object', 'object'
if is_bool_dtype(values) or is_bool_dtype(dtype):
# we are actually coercing to uint64
# until our algos suppport uint8 directly (see TODO)
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,10 +1151,13 @@ def test_isin(self):
expected = DataFrame([df.loc[s].isin(other) for s in df.index])
tm.assert_frame_equal(result, expected)

def test_isin_empty(self):
@pytest.mark.parametrize("empty", [[], Series(), np.array([])])
def test_isin_empty(self, empty):
# see gh-16991
df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']})
result = df.isin([])
expected = pd.DataFrame(False, df.index, df.columns)
expected = DataFrame(False, df.index, df.columns)

result = df.isin(empty)
tm.assert_frame_equal(result, expected)

def test_isin_dict(self):
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1407,6 +1407,15 @@ def check_idx(idx):
# Float64Index overrides isin, so must be checked separately
check_idx(Float64Index([1.0, 2.0, 3.0, 4.0]))

@pytest.mark.parametrize("empty", [[], Series(), np.array([])])
def test_isin_empty(self, empty):
# see gh-16991
idx = Index(["a", "b"])
expected = np.array([False, False])

result = idx.isin(empty)
tm.assert_numpy_array_equal(expected, result)

def test_boolean_cmp(self):
values = [1, 2, 3, 4]

Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1147,6 +1147,15 @@ def test_isin_with_i8(self):
result = s.isin(s[0:2])
assert_series_equal(result, expected)

@pytest.mark.parametrize("empty", [[], Series(), np.array([])])
def test_isin_empty(self, empty):
# see gh-16991
s = Series(["a", "b"])
expected = Series([False, False])

result = s.isin(empty)
tm.assert_series_equal(expected, result)

def test_timedelta64_analytics(self):
from pandas import date_range

Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,15 @@ def test_categorical_from_codes(self):
result = algos.isin(Sd, St)
tm.assert_numpy_array_equal(expected, result)

@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
def test_empty(self, empty):
# see gh-16991
vals = pd.Index(["a", "b"])
expected = np.array([False, False])

result = algos.isin(vals, empty)
tm.assert_numpy_array_equal(expected, result)


class TestValueCounts(object):

Expand Down

0 comments on commit 59c7dde

Please sign in to comment.