Skip to content

Commit

Permalink
DEPR: loc with listlikes with missing elements (pandas-dev#29802)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and proost committed Dec 19, 2019
1 parent de8f942 commit 3238c3d
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 241 deletions.
16 changes: 5 additions & 11 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1176,18 +1176,12 @@ def _validate_read_indexer(
# non-missing values), but a bit later in the
# code, so we want to avoid warning & then
# just raising

_missing_key_warning = textwrap.dedent(
"""
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.
See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike""" # noqa: E501
)

if not (ax.is_categorical() or ax.is_interval()):
warnings.warn(_missing_key_warning, FutureWarning, stacklevel=6)
raise KeyError(
"Passing list-likes to .loc or [] with any missing labels "
"is no longer supported, see "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501
)

def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False):
"""
Expand Down
12 changes: 4 additions & 8 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,16 +393,12 @@ def __init__(
if not len(Index(cols) & df.columns):
raise KeyError("passes columns are not ALL present dataframe")

# deprecatedin gh-17295
# 1 missing is ok (for now)
if len(Index(cols) & df.columns) != len(cols):
warnings.warn(
"Not all names specified in 'columns' are found; "
"this will raise a KeyError in the future",
FutureWarning,
)
# Deprecated in GH#17295, enforced in 1.0.0
raise KeyError("Not all names specified in 'columns' are found")

self.df = df

self.df = df.reindex(columns=cols)
self.columns = self.df.columns
self.float_format = float_format
self.index = index
Expand Down
16 changes: 5 additions & 11 deletions pandas/tests/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dateutil import tz
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Index, Series, Timestamp, date_range
Expand Down Expand Up @@ -242,11 +243,8 @@ def test_series_partial_set_datetime(self):
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
]
exp = Series(
[np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name="idx"), name="s"
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
with pytest.raises(KeyError, match="with any missing labels"):
ser.loc[keys]

def test_series_partial_set_period(self):
# GH 11497
Expand All @@ -273,12 +271,8 @@ def test_series_partial_set_period(self):
pd.Period("2011-01-02", freq="D"),
pd.Period("2011-01-03", freq="D"),
]
exp = Series(
[np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name="idx"), name="s"
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[keys]
tm.assert_series_equal(result, exp)
with pytest.raises(KeyError, match="with any missing labels"):
ser.loc[keys]

def test_nanosecond_getitem_setitem_with_tz(self):
# GH 11679
Expand Down
28 changes: 9 additions & 19 deletions pandas/tests/indexing/test_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,25 +726,15 @@ def test_floating_misc(self):
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result1 = s[[1.6, 5, 10]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = s.loc[[1.6, 5, 10]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result3 = s.loc[[1.6, 5, 10]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, Series([np.nan, 2, 4], index=[1.6, 5, 10]))

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result1 = s[[0, 1, 2]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = s.loc[[0, 1, 2]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result3 = s.loc[[0, 1, 2]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, Series([0.0, np.nan, np.nan], index=[0, 1, 2]))
with pytest.raises(KeyError, match="with any missing labels"):
s[[1.6, 5, 10]]
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[1.6, 5, 10]]

with pytest.raises(KeyError, match="with any missing labels"):
s[[0, 1, 2]]
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[0, 1, 2]]

result1 = s.loc[[2.5, 5]]
result2 = s.loc[[2.5, 5]]
Expand Down
16 changes: 2 additions & 14 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,20 +728,8 @@ def test_iloc_non_unique_indexing(self):
df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
df2 = concat([df2, 2 * df2, 3 * df2])

sidx = df2.index.to_series()
expected = df2.iloc[idx[idx <= sidx.max()]]

new_list = []
for r, s in expected.iterrows():
new_list.append(s)
new_list.append(s * 2)
new_list.append(s * 3)

expected = DataFrame(new_list)
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])], sort=True)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df2.loc[idx]
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
df2.loc[idx]

def test_iloc_empty_list_indexer_is_ok(self):

Expand Down
56 changes: 12 additions & 44 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,32 +299,13 @@ def test_dups_fancy_indexing(self):
tm.assert_frame_equal(result, expected)

rows = ["C", "B", "E"]
expected = DataFrame(
{
"test": [11, 9, np.nan],
"test1": [7.0, 6, np.nan],
"other": ["d", "c", np.nan],
},
index=rows,
)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[rows]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[rows]

# see GH5553, make sure we use the right indexer
rows = ["F", "G", "H", "C", "B", "E"]
expected = DataFrame(
{
"test": [np.nan, np.nan, np.nan, 11, 9, np.nan],
"test1": [np.nan, np.nan, np.nan, 7.0, 6, np.nan],
"other": [np.nan, np.nan, np.nan, "d", "c", np.nan],
},
index=rows,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[rows]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[rows]

# List containing only missing label
dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
Expand All @@ -340,38 +321,25 @@ def test_dups_fancy_indexing(self):

# GH 4619; duplicate indexer with missing label
df = DataFrame({"A": [0, 1, 2]})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[[0, 8, 0]]
expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[[0, 8, 0]]

df = DataFrame({"A": list("abc")})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[[0, 8, 0]]
expected = DataFrame({"A": ["a", np.nan, "a"]}, index=[0, 8, 0])
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[[0, 8, 0]]

# non unique with non unique selector
df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
expected = DataFrame(
{"test": [5, 7, 5, 7, np.nan]}, index=["A", "A", "A", "A", "E"]
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[["A", "A", "E"]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[["A", "A", "E"]]

def test_dups_fancy_indexing2(self):
# GH 5835
# dups on index and missing values
df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"])

expected = pd.concat(
[df.loc[:, ["A", "B"]], DataFrame(np.nan, columns=["C"], index=df.index)],
axis=1,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[:, ["A", "B", "C"]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[:, ["A", "B", "C"]]

# GH 6504, multi-axis indexing
df = DataFrame(
Expand Down
78 changes: 35 additions & 43 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,48 +159,46 @@ def test_loc_getitem_label_list_with_missing(self):
self.check_result(
"loc", [0, 1, 2], "indexer", [0, 1, 2], typs=["empty"], fails=KeyError,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"loc",
[0, 2, 10],
"ix",
[0, 2, 10],
typs=["ints", "uints", "floats"],
axes=0,
fails=KeyError,
)
self.check_result(
"loc",
[0, 2, 10],
"ix",
[0, 2, 10],
typs=["ints", "uints", "floats"],
axes=0,
fails=KeyError,
)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"loc",
[3, 6, 7],
"ix",
[3, 6, 7],
typs=["ints", "uints", "floats"],
axes=1,
fails=KeyError,
)
self.check_result(
"loc",
[3, 6, 7],
"ix",
[3, 6, 7],
typs=["ints", "uints", "floats"],
axes=1,
fails=KeyError,
)

# GH 17758 - MultiIndex and missing keys
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"loc",
[(1, 3), (1, 4), (2, 5)],
"ix",
[(1, 3), (1, 4), (2, 5)],
typs=["multi"],
axes=0,
)
self.check_result(
"loc",
[(1, 3), (1, 4), (2, 5)],
"ix",
[(1, 3), (1, 4), (2, 5)],
typs=["multi"],
axes=0,
fails=KeyError,
)

def test_getitem_label_list_with_missing(self):
s = Series(range(3), index=["a", "b", "c"])

# consistency
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with pytest.raises(KeyError, match="with any missing labels"):
s[["a", "d"]]

s = Series(range(3))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with pytest.raises(KeyError, match="with any missing labels"):
s[[0, 3]]

def test_loc_getitem_label_list_fails(self):
Expand Down Expand Up @@ -305,10 +303,8 @@ def test_loc_to_fail(self):
s.loc[["4"]]

s.loc[-1] = 3
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[-1, -2]]
expected = Series([3, np.nan], index=[-1, -2])
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[-1, -2]]

s["a"] = 2
msg = (
Expand Down Expand Up @@ -354,10 +350,8 @@ def test_loc_getitem_list_with_fail(self):
s.loc[[3]]

# a non-match and a match
with tm.assert_produces_warning(FutureWarning):
expected = s.loc[[2, 3]]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[2, 3]]

def test_loc_getitem_label_slice(self):

Expand Down Expand Up @@ -1034,10 +1028,8 @@ def test_series_loc_getitem_label_list_missing_values():
["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64"
)
s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4))
expected = Series([11.0, 5.0, 11.0, np.nan], index=key)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[key]
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[key]


@pytest.mark.parametrize(
Expand Down
Loading

0 comments on commit 3238c3d

Please sign in to comment.