Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: loc with listlikes with missing elements #29802

Merged
merged 7 commits into from
Nov 29, 2019
16 changes: 5 additions & 11 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,18 +1174,12 @@ def _validate_read_indexer(
# non-missing values), but a bit later in the
# code, so we want to avoid warning & then
# just raising

_missing_key_warning = textwrap.dedent(
"""
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike""" # noqa: E501
)

if not (ax.is_categorical() or ax.is_interval()):
warnings.warn(_missing_key_warning, FutureWarning, stacklevel=6)
raise KeyError(
"Passing list-likes to .loc or [] with any missing labels "
"is no longer supported, see "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501
)

def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False):
"""
Expand Down
12 changes: 4 additions & 8 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,16 +393,12 @@ def __init__(
if not len(Index(cols) & df.columns):
raise KeyError("passes columns are not ALL present dataframe")

# deprecatedin gh-17295
# 1 missing is ok (for now)
if len(Index(cols) & df.columns) != len(cols):
warnings.warn(
"Not all names specified in 'columns' are found; "
"this will raise a KeyError in the future",
FutureWarning,
)
# Deprecated in GH#17295, enforced in 1.0.0
raise KeyError("Not all names specified in 'columns' are found")

self.df = df

self.df = df.reindex(columns=cols)
self.columns = self.df.columns
self.float_format = float_format
self.index = index
Expand Down
16 changes: 5 additions & 11 deletions pandas/tests/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dateutil import tz
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Index, Series, Timestamp, date_range
Expand Down Expand Up @@ -242,11 +243,8 @@ def test_series_partial_set_datetime(self):
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
]
exp = Series(
[np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name="idx"), name="s"
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
with pytest.raises(KeyError, match="with any missing labels"):
ser.loc[keys]

def test_series_partial_set_period(self):
# GH 11497
Expand All @@ -273,12 +271,8 @@ def test_series_partial_set_period(self):
pd.Period("2011-01-02", freq="D"),
pd.Period("2011-01-03", freq="D"),
]
exp = Series(
[np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name="idx"), name="s"
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[keys]
tm.assert_series_equal(result, exp)
with pytest.raises(KeyError, match="with any missing labels"):
ser.loc[keys]

def test_nanosecond_getitem_setitem_with_tz(self):
# GH 11679
Expand Down
28 changes: 9 additions & 19 deletions pandas/tests/indexing/test_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,25 +726,15 @@ def test_floating_misc(self):
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result1 = s[[1.6, 5, 10]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = s.loc[[1.6, 5, 10]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result3 = s.loc[[1.6, 5, 10]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, Series([np.nan, 2, 4], index=[1.6, 5, 10]))

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result1 = s[[0, 1, 2]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = s.loc[[0, 1, 2]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result3 = s.loc[[0, 1, 2]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, Series([0.0, np.nan, np.nan], index=[0, 1, 2]))
with pytest.raises(KeyError, match="with any missing labels"):
s[[1.6, 5, 10]]
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[1.6, 5, 10]]

with pytest.raises(KeyError, match="with any missing labels"):
s[[0, 1, 2]]
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[0, 1, 2]]

result1 = s.loc[[2.5, 5]]
result2 = s.loc[[2.5, 5]]
Expand Down
16 changes: 2 additions & 14 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,20 +728,8 @@ def test_iloc_non_unique_indexing(self):
df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
df2 = concat([df2, 2 * df2, 3 * df2])

sidx = df2.index.to_series()
expected = df2.iloc[idx[idx <= sidx.max()]]

new_list = []
for r, s in expected.iterrows():
new_list.append(s)
new_list.append(s * 2)
new_list.append(s * 3)

expected = DataFrame(new_list)
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])], sort=True)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df2.loc[idx]
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
df2.loc[idx]

def test_iloc_empty_list_indexer_is_ok(self):

Expand Down
56 changes: 12 additions & 44 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,32 +299,13 @@ def test_dups_fancy_indexing(self):
tm.assert_frame_equal(result, expected)

rows = ["C", "B", "E"]
expected = DataFrame(
{
"test": [11, 9, np.nan],
"test1": [7.0, 6, np.nan],
"other": ["d", "c", np.nan],
},
index=rows,
)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[rows]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[rows]

# see GH5553, make sure we use the right indexer
rows = ["F", "G", "H", "C", "B", "E"]
expected = DataFrame(
{
"test": [np.nan, np.nan, np.nan, 11, 9, np.nan],
"test1": [np.nan, np.nan, np.nan, 7.0, 6, np.nan],
"other": [np.nan, np.nan, np.nan, "d", "c", np.nan],
},
index=rows,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[rows]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[rows]

# List containing only missing label
dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
Expand All @@ -340,38 +321,25 @@ def test_dups_fancy_indexing(self):

# GH 4619; duplicate indexer with missing label
df = DataFrame({"A": [0, 1, 2]})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[[0, 8, 0]]
expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[[0, 8, 0]]

df = DataFrame({"A": list("abc")})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[[0, 8, 0]]
expected = DataFrame({"A": ["a", np.nan, "a"]}, index=[0, 8, 0])
tm.assert_frame_equal(result, expected, check_index_type=False)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[[0, 8, 0]]

# non unique with non unique selector
df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
expected = DataFrame(
{"test": [5, 7, 5, 7, np.nan]}, index=["A", "A", "A", "A", "E"]
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[["A", "A", "E"]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[["A", "A", "E"]]

def test_dups_fancy_indexing2(self):
# GH 5835
# dups on index and missing values
df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"])

expected = pd.concat(
[df.loc[:, ["A", "B"]], DataFrame(np.nan, columns=["C"], index=df.index)],
axis=1,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df.loc[:, ["A", "B", "C"]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
df.loc[:, ["A", "B", "C"]]

# GH 6504, multi-axis indexing
df = DataFrame(
Expand Down
78 changes: 35 additions & 43 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,48 +159,46 @@ def test_loc_getitem_label_list_with_missing(self):
self.check_result(
"loc", [0, 1, 2], "indexer", [0, 1, 2], typs=["empty"], fails=KeyError,
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"loc",
[0, 2, 10],
"ix",
[0, 2, 10],
typs=["ints", "uints", "floats"],
axes=0,
fails=KeyError,
)
self.check_result(
"loc",
[0, 2, 10],
"ix",
[0, 2, 10],
typs=["ints", "uints", "floats"],
axes=0,
fails=KeyError,
)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"loc",
[3, 6, 7],
"ix",
[3, 6, 7],
typs=["ints", "uints", "floats"],
axes=1,
fails=KeyError,
)
self.check_result(
"loc",
[3, 6, 7],
"ix",
[3, 6, 7],
typs=["ints", "uints", "floats"],
axes=1,
fails=KeyError,
)

# GH 17758 - MultiIndex and missing keys
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result(
"loc",
[(1, 3), (1, 4), (2, 5)],
"ix",
[(1, 3), (1, 4), (2, 5)],
typs=["multi"],
axes=0,
)
self.check_result(
"loc",
[(1, 3), (1, 4), (2, 5)],
"ix",
[(1, 3), (1, 4), (2, 5)],
typs=["multi"],
axes=0,
fails=KeyError,
)

def test_getitem_label_list_with_missing(self):
s = Series(range(3), index=["a", "b", "c"])

# consistency
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with pytest.raises(KeyError, match="with any missing labels"):
s[["a", "d"]]

s = Series(range(3))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with pytest.raises(KeyError, match="with any missing labels"):
s[[0, 3]]

def test_loc_getitem_label_list_fails(self):
Expand Down Expand Up @@ -305,10 +303,8 @@ def test_loc_to_fail(self):
s.loc[["4"]]

s.loc[-1] = 3
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[-1, -2]]
expected = Series([3, np.nan], index=[-1, -2])
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[-1, -2]]

s["a"] = 2
msg = (
Expand Down Expand Up @@ -354,10 +350,8 @@ def test_loc_getitem_list_with_fail(self):
s.loc[[3]]

# a non-match and a match
with tm.assert_produces_warning(FutureWarning):
expected = s.loc[[2, 3]]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[2, 3]]

def test_loc_getitem_label_slice(self):

Expand Down Expand Up @@ -1034,10 +1028,8 @@ def test_series_loc_getitem_label_list_missing_values():
["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64"
)
s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4))
expected = Series([11.0, 5.0, 11.0, np.nan], index=key)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[key]
tm.assert_series_equal(result, expected)
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[key]


@pytest.mark.parametrize(
Expand Down
Loading