diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index cfdb53ec7e4b1..44358593793bc 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -1009,7 +1009,7 @@ The different indexing operation can potentially change the dtype of a ``Series` series1 = pd.Series([1, 2, 3]) series1.dtype - res = series1[[0,4]] + res = series1.reindex([0, 4]) res.dtype res diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index edbc4e6d7fd22..415f3fd702c43 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -333,8 +333,15 @@ Selection By Label dfl.loc['20130102':'20130104'] +.. warning:: + + Starting in 0.21.0, pandas will show a ``FutureWarning`` if indexing with a list with missing labels. In the future + this will raise a ``KeyError``. See :ref:`list-like Using loc with missing keys in a list is Deprecated ` + pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol. -**At least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, both the start bound **AND** the stop bound are *included*, if present in the index. Integers are valid labels, but they refer to the label **and not the position**. +All of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! +When slicing, both the start bound **AND** the stop bound are *included*, if present in the index. +Integers are valid labels, but they refer to the label **and not the position**. The ``.loc`` attribute is the primary access method. The following are valid inputs: @@ -635,6 +642,107 @@ For getting *multiple* indexers, using ``.get_indexer`` dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])] +.. _indexing.deprecate_loc_reindex_listlike: + +Indexing with list with missing labels is Deprecated +---------------------------------------------------- + +.. warning:: + + Starting in 0.21.0, using ``.loc`` or ``[]`` with a list with one or more missing labels, is deprecated, in favor of ``.reindex``. + +In prior versions, using ``.loc[list-of-labels]`` would work as long as *at least 1* of the keys was found (otherwise it +would raise a ``KeyError``). This behavior is deprecated and will show a warning message pointing to this section. The +recommeded alternative is to use ``.reindex()``. + +For example. + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Selection with all keys found is unchanged. + +.. ipython:: python + + s.loc[[1, 2]] + +Previous Behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Current Behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Passing list-likes to .loc with any non-matching elements will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike + + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Reindexing +~~~~~~~~~~ + +The idiomatic way to achieve selecting potentially not-found elmenents is via ``.reindex()``. See also the section on :ref:`reindexing `. + +.. ipython:: python + + s.reindex([1, 2, 3]) + +Alternatively, if you want to select only *valid* keys, the following is idiomatic and efficient; it is guaranteed to preserve the dtype of the selection. + +.. ipython:: python + + labels = [1, 2, 3] + s.loc[s.index.intersection(labels)] + +Having a duplicated index will raise for a ``.reindex()``: + +.. ipython:: python + + s = pd.Series(np.arange(4), index=['a', 'a', 'b', 'c']) + labels = ['c', 'd'] + +.. code-block:: ipython + + In [17]: s.reindex(labels) + ValueError: cannot reindex from a duplicate axis + +Generally, you can interesect the desired labels with the current +axis, and then reindex. + +.. ipython:: python + + s.loc[s.index.intersection(labels)].reindex(labels) + +However, this would *still* raise if your resulting index is duplicated. + +.. code-block:: ipython + + In [41]: labels = ['a', 'd'] + + In [42]: s.loc[s.index.intersection(labels)].reindex(labels) + ValueError: cannot reindex from a duplicate axis + + .. _indexing.basics.partial_setting: Selecting Random Samples @@ -852,7 +960,7 @@ when you don't know which of the sought labels are in fact present: s[s.index.isin([2, 4, 6])] # compare it to the following - s[[2, 4, 6]] + s.reindex([2, 4, 6]) In addition to that, ``MultiIndex`` allows selecting a separate level to use in the membership check: diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index 6282f15b6faeb..e44bc6e9e91e0 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -676,10 +676,19 @@ Other notable API changes: Both will now return a frame reindex by [1,3]. E.g. - .. ipython:: python + .. code-block:: ipython - df.loc[[1,3]] - df.loc[[1,3],:] + In [3]: df.loc[[1,3]] + Out[3]: + 0 + 1 a + 3 NaN + + In [4]: df.loc[[1,3],:] + Out[4]: + 0 + 1 a + 3 NaN This can also be seen in multi-axis indexing with a ``Panel``. @@ -693,9 +702,14 @@ Other notable API changes: The following would raise ``KeyError`` prior to 0.15.0: - .. ipython:: python + .. code-block:: ipython - p.loc[['ItemA','ItemD'],:,'D'] + In [5]: + Out[5]: + ItemA ItemD + 1 3 NaN + 2 7 NaN + 3 11 NaN Furthermore, ``.loc`` will raise If no values are found in a multi-index with a list-like indexer: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f4ec8a5f2ad24..0d4eaa90d7ab3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -300,6 +300,64 @@ If installed, we now require: | Bottleneck | 1.0.0 | | +--------------+-----------------+----------+ +.. _whatsnew_0210.api_breaking.loc: + +Indexing with a list with missing labels is Deprecated +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, selecting with a list of labels, where one or more labels were missing would always succeed, returning ``NaN`` for missing labels. +This will now show a ``FutureWarning``, in the future this will raise a ``KeyError`` (:issue:`15747`). +This warning will trigger on a ``DataFrame`` or a ``Series`` for using ``.loc[]`` or ``[[]]`` when passing a list-of-labels with at least 1 missing label. +See the :ref:`deprecation docs `. + + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Previous Behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Current Behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Passing list-likes to .loc or [] with any missing label will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike + + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + +The idiomatic way to achieve selecting potentially not-found elmenents is via ``.reindex()`` + +.. ipython:: python + + s.reindex([1, 2, 3]) + +Selection with all keys found is unchanged. + +.. ipython:: python + + s.loc[[1, 2]] + + .. _whatsnew_0210.api_breaking.pandas_eval: Improved error handling during item assignment in pd.eval @@ -607,6 +665,7 @@ Deprecations - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) - passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) +- Passing a non-existant column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) .. _whatsnew_0210.deprecations.argmin_min: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2ea1b8a238913..e977e84702982 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1419,13 +1419,33 @@ def _has_valid_type(self, key, axis): if isinstance(key, tuple) and isinstance(ax, MultiIndex): return True - # TODO: don't check the entire key unless necessary - if (not is_iterator(key) and len(key) and - np.all(ax.get_indexer_for(key) < 0)): + if not is_iterator(key) and len(key): - raise KeyError(u"None of [{key}] are in the [{axis}]" - .format(key=key, - axis=self.obj._get_axis_name(axis))) + # True indicates missing values + missing = ax.get_indexer_for(key) < 0 + + if np.any(missing): + if len(key) == 1 or np.all(missing): + raise KeyError( + u"None of [{key}] are in the [{axis}]".format( + key=key, axis=self.obj._get_axis_name(axis))) + else: + + # we skip the warning on Categorical/Interval + # as this check is actually done (check for + # non-missing values), but a bit later in the + # code, so we want to avoid warning & then + # just raising + _missing_key_warning = textwrap.dedent(""" + Passing list-likes to .loc or [] with any missing label will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""") # noqa + + if not (ax.is_categorical() or ax.is_interval()): + warnings.warn(_missing_key_warning, + FutureWarning, stacklevel=5) return True diff --git a/pandas/core/series.py b/pandas/core/series.py index 97f39a680c8c9..58cac46f63d7e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -691,7 +691,7 @@ def _get_with(self, key): if key_type == 'integer': if self.index.is_integer() or self.index.is_floating(): - return self.reindex(key) + return self.loc[key] else: return self._get_values(key) elif key_type == 'boolean': diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 51668bb6b0895..9e888c38edaa7 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -356,7 +356,21 @@ def __init__(self, df, na_rep='', float_format=None, cols=None, self.styler = None self.df = df if cols is not None: - self.df = df.loc[:, cols] + + # all missing, raise + if not len(Index(cols) & df.columns): + raise KeyError( + "passes columns are not ALL present dataframe") + + # deprecatedin gh-17295 + # 1 missing is ok (for now) + if len(Index(cols) & df.columns) != len(cols): + warnings.warn( + "Not all names specified in 'columns' are found; " + "this will raise a KeyError in the future", + FutureWarning) + + self.df = df.reindex(columns=cols) self.columns = self.df.columns self.float_format = float_format self.index = index diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 6874fedaa705f..2f01eced364a3 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -111,7 +111,8 @@ def test_loc_listlike(self): assert_frame_equal(result, expected, check_index_type=True) # not all labels in the categories - pytest.raises(KeyError, lambda: self.df2.loc[['a', 'd']]) + with pytest.raises(KeyError): + self.df2.loc[['a', 'd']] def test_loc_listlike_dtypes(self): # GH 11586 diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index ddac80fbc4693..617757c888eb5 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -223,7 +223,9 @@ def test_series_partial_set_datetime(self): Timestamp('2011-01-03')] exp = Series([np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name='idx'), name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) def test_series_partial_set_period(self): # GH 11497 @@ -248,5 +250,7 @@ def test_series_partial_set_period(self): pd.Period('2011-01-03', freq='D')] exp = Series([np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name='idx'), name='s') - result = ser.loc[keys] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = ser.loc[keys] tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 39569f0b0cb38..c8e320f9d9c77 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -617,7 +617,8 @@ def test_iloc_non_unique_indexing(self): expected = DataFrame(new_list) expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) ]) - result = df2.loc[idx] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df2.loc[idx] tm.assert_frame_equal(result, expected, check_index_type=False) def test_iloc_empty_list_indexer_is_ok(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f1f51f26df55c..d64ed98243d72 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -176,7 +176,8 @@ def test_dups_fancy_indexing(self): 'test1': [7., 6, np.nan], 'other': ['d', 'c', np.nan]}, index=rows) - result = df.loc[rows] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[rows] tm.assert_frame_equal(result, expected) # see GH5553, make sure we use the right indexer @@ -186,7 +187,8 @@ def test_dups_fancy_indexing(self): 'other': [np.nan, np.nan, np.nan, 'd', 'c', np.nan]}, index=rows) - result = df.loc[rows] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[rows] tm.assert_frame_equal(result, expected) # inconsistent returns for unique/duplicate indices when values are @@ -203,12 +205,14 @@ def test_dups_fancy_indexing(self): # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) - result = df.loc[[0, 8, 0]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) - result = df.loc[[0, 8, 0]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) @@ -216,7 +220,8 @@ def test_dups_fancy_indexing(self): df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) expected = DataFrame( {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) - result = df.loc[['A', 'A', 'E']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected) # GH 5835 @@ -227,7 +232,8 @@ def test_dups_fancy_indexing(self): expected = pd.concat( [df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], index=df.index)], axis=1) - result = df.loc[:, ['A', 'B', 'C']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[:, ['A', 'B', 'C']] tm.assert_frame_equal(result, expected) # GH 6504, multi-axis indexing diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 95d6a24e68425..c6f38aeba9e87 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -152,15 +152,29 @@ def test_loc_getitem_label_list(self): [Timestamp('20130102'), Timestamp('20130103')], typs=['ts'], axes=0) + def test_loc_getitem_label_list_with_missing(self): self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], typs=['empty'], fails=KeyError) - self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], - typs=['ints', 'uints'], axes=0, fails=KeyError) - self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], - typs=['ints', 'uints'], axes=1, fails=KeyError) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], + typs=['ints', 'uints'], axes=0, fails=KeyError) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], + typs=['ints', 'uints'], axes=1, fails=KeyError) self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], typs=['ints', 'uints'], axes=2, fails=KeyError) + def test_getitem_label_list_with_missing(self): + s = pd.Series(range(3), index=['a', 'b', 'c']) + + # consistency + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + s[['a', 'd']] + + s = pd.Series(range(3)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + s[[0, 3]] + def test_loc_getitem_label_list_fails(self): # fails self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], @@ -249,7 +263,9 @@ def test_loc_to_fail(self): pytest.raises(KeyError, lambda: s.loc[['4']]) s.loc[-1] = 3 - result = s.loc[[-1, -2]] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s.loc[[-1, -2]] expected = Series([3, np.nan], index=[-1, -2]) tm.assert_series_equal(result, expected) @@ -277,6 +293,23 @@ def f(): pytest.raises(KeyError, f) + def test_loc_getitem_list_with_fail(self): + # 15747 + # should KeyError if *any* missing labels + + s = Series([1, 2, 3]) + + s.loc[[2]] + + with pytest.raises(KeyError): + s.loc[[3]] + + # a non-match and a match + with tm.assert_produces_warning(FutureWarning): + expected = s.loc[[2, 3]] + result = s.reindex([2, 3]) + tm.assert_series_equal(result, expected) + def test_loc_getitem_label_slice(self): # label slices (with ints) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 93a85e247a787..41ddfe934a131 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -222,13 +222,21 @@ def test_series_partial_set(self): # Regression from GH4825 ser = Series([0.1, 0.2], index=[1, 2]) - # loc + # loc equiv to .reindex expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) - result = ser.loc[[3, 2, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([3, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x']) - result = ser.loc[[3, 2, 3, 'x']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3, 'x']] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([3, 2, 3, 'x']) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) @@ -236,38 +244,71 @@ def test_series_partial_set(self): tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1]) - result = ser.loc[[2, 2, 'x', 1]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 'x', 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([2, 2, 'x', 1]) tm.assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]]) expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) - result = ser.loc[[2, 2, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 3]] tm.assert_series_equal(result, expected, check_index_type=True) + result = ser.reindex([2, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) - result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[3, 4, 4]] tm.assert_series_equal(result, expected, check_index_type=True) + result = s.reindex([3, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 3, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[5, 3, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([5, 3, 3]) tm.assert_series_equal(result, expected, check_index_type=True) + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 4, 4]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[5, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([5, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) + s = Series([0.1, 0.2, 0.3, 0.4], + index=[4, 5, 6, 7]) expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[4, 5, 6, 7]).loc[[7, 2, 2]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[7, 2, 2]] tm.assert_series_equal(result, expected, check_index_type=True) + result = s.reindex([7, 2, 2]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[4, 5, 5]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[4, 5, 5]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([4, 5, 5]) tm.assert_series_equal(result, expected, check_index_type=True) # iloc @@ -284,13 +325,15 @@ def test_series_partial_set_with_name(self): # loc exp_idx = Index([3, 2, 3], dtype='int64', name='idx') expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[3, 2, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, name='s') - result = ser.loc[[3, 2, 3, 'x']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3, 'x']] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([2, 2, 1], dtype='int64', name='idx') @@ -300,7 +343,8 @@ def test_series_partial_set_with_name(self): exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx') expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s') - result = ser.loc[[2, 2, 'x', 1]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 'x', 1]] tm.assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index @@ -308,41 +352,49 @@ def test_series_partial_set_with_name(self): exp_idx = Index([2, 2, 3], dtype='int64', name='idx') expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[2, 2, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 3]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([3, 4, 4], dtype='int64', name='idx') expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s') idx = Index([1, 2, 3], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3], + index=idx, + name='s').loc[[3, 4, 4]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([5, 3, 3], dtype='int64', name='idx') expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 3, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 3, 3]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([5, 4, 4], dtype='int64', name='idx') expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 4, 4]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 4, 4]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([7, 2, 2], dtype='int64', name='idx') expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') idx = Index([4, 5, 6, 7], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[7, 2, 2]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[7, 2, 2]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([4, 5, 5], dtype='int64', name='idx') expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[4, 5, 5]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[4, 5, 5]] tm.assert_series_equal(result, expected, check_index_type=True) # iloc diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 6a399f41975e5..4e25fe0371718 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1808,8 +1808,10 @@ def test_invalid_columns(self): write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) - write_frame.to_excel(path, 'test1', columns=['B', 'C']) - expected = write_frame.loc[:, ['B', 'C']] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + write_frame.to_excel(path, 'test1', columns=['B', 'C']) + expected = write_frame.reindex(columns=['B', 'C']) read_frame = read_excel(path, 'test1') tm.assert_frame_equal(expected, read_frame) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 6e646f9b29442..65d58a196d1eb 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1222,7 +1222,7 @@ def test_handle_empty_objects(self): frames = [baz, empty, empty, df[5:]] concatted = concat(frames, axis=0) - expected = df.loc[:, ['a', 'b', 'c', 'd', 'foo']] + expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo']) expected['foo'] = expected['foo'].astype('O') expected.loc[0:4, 'foo'] = 'bar' diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 272e8c7de5e49..86211612a5955 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -590,8 +590,13 @@ def test_getitem_dups_with_missing(self): # breaks reindex, so need to use .loc internally # GH 4246 s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah']) - expected = s.loc[['foo', 'bar', 'bah', 'bam']] - result = s[['foo', 'bar', 'bah', 'bam']] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = s.loc[['foo', 'bar', 'bah', 'bam']] + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s[['foo', 'bar', 'bah', 'bam']] assert_series_equal(result, expected) def test_getitem_dups(self): diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index 382cff4b9d0ac..edbac8f09241b 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -121,8 +121,8 @@ def test_loc(self): tm.assert_sp_series_equal(result, exp) # exceeds the bounds - result = sparse.loc[[1, 3, 4, 5]] - exp = orig.loc[[1, 3, 4, 5]].to_sparse() + result = sparse.reindex([1, 3, 4, 5]) + exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_series_equal(result, exp) # padded with NaN assert np.isnan(result[-1]) @@ -677,8 +677,8 @@ def test_loc(self): tm.assert_sp_frame_equal(result, exp) # exceeds the bounds - result = sparse.loc[[1, 3, 4, 5]] - exp = orig.loc[[1, 3, 4, 5]].to_sparse() + result = sparse.reindex([1, 3, 4, 5]) + exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array