From 19c1de1e48a350859ad6b98b3249b7f05449741b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 1 Oct 2017 19:23:11 -0400 Subject: [PATCH] doc changes deprecate passing non-existing column in .to_excel(..., columns=) --- doc/source/advanced.rst | 2 +- doc/source/indexing.rst | 17 +++++++++-------- doc/source/whatsnew/v0.21.0.txt | 10 ++++++---- pandas/io/formats/excel.py | 15 +++++++++++---- pandas/tests/io/test_excel.py | 4 +++- 5 files changed, 30 insertions(+), 18 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index cfdb53ec7e4b1a..44358593793bca 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -1009,7 +1009,7 @@ The different indexing operation can potentially change the dtype of a ``Series` series1 = pd.Series([1, 2, 3]) series1.dtype - res = series1[[0,4]] + res = series1.reindex([0, 4]) res.dtype res diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index aa7de89e8bbd2c..415f3fd702c43f 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -335,7 +335,7 @@ Selection By Label .. warning:: - Starting in 0.21.0, pandas will show a ``FutureWarning`` if indexing with a list-of-lables and not ALL labels are present. In the future + Starting in 0.21.0, pandas will show a ``FutureWarning`` if indexing with a list with missing labels. In the future this will raise a ``KeyError``. See :ref:`list-like Using loc with missing keys in a list is Deprecated ` pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol. @@ -644,12 +644,12 @@ For getting *multiple* indexers, using ``.get_indexer`` .. _indexing.deprecate_loc_reindex_listlike: -Indexing with missing list-of-labels is Deprecated --------------------------------------------------- +Indexing with list with missing labels is Deprecated +---------------------------------------------------- .. warning:: - Starting in 0.21.0, using ``.loc`` or ``[]`` with a list-like containing one or more missing labels, is deprecated, in favor of ``.reindex``. + Starting in 0.21.0, using ``.loc`` or ``[]`` with a list with one or more missing labels, is deprecated, in favor of ``.reindex``. In prior versions, using ``.loc[list-of-labels]`` would work as long as *at least 1* of the keys was found (otherwise it would raise a ``KeyError``). This behavior is deprecated and will show a warning message pointing to this section. The @@ -672,7 +672,6 @@ Previous Behavior .. code-block:: ipython - In [4]: s.loc[[1, 2, 3]] Out[4]: 1 2.0 @@ -683,6 +682,8 @@ Previous Behavior Current Behavior +.. code-block:: ipython + In [4]: s.loc[[1, 2, 3]] Passing list-likes to .loc with any non-matching elements will raise KeyError in the future, you can use .reindex() as an alternative. @@ -720,7 +721,7 @@ Having a duplicated index will raise for a ``.reindex()``: s = pd.Series(np.arange(4), index=['a', 'a', 'b', 'c']) labels = ['c', 'd'] -.. code-block:: python +.. code-block:: ipython In [17]: s.reindex(labels) ValueError: cannot reindex from a duplicate axis @@ -734,7 +735,7 @@ axis, and then reindex. However, this would *still* raise if your resulting index is duplicated. -.. code-block:: python +.. code-block:: ipython In [41]: labels = ['a', 'd'] @@ -959,7 +960,7 @@ when you don't know which of the sought labels are in fact present: s[s.index.isin([2, 4, 6])] # compare it to the following - s[[2, 4, 6]] + s.reindex([2, 4, 6]) In addition to that, ``MultiIndex`` allows selecting a separate level to use in the membership check: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e3b1db61b7f947..ced83a57ccbfc2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -270,10 +270,10 @@ We have updated our minimum supported versions of dependencies (:issue:`15206`, .. _whatsnew_0210.api_breaking.loc: -Indexing with missing list-of-labels is Deprecated -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Indexing with a list with missing labels is Deprecated +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, selecting at least 1 valid label with a list-like indexer would always succeed, returning ``NaN`` for missing labels. +Previously, selecting with a list of labels, where one or more labels were missing would always succeed, returning ``NaN`` for missing labels. This will now show a ``FutureWarning``, in the future this will raise a ``KeyError`` (:issue:`15747`). This warning will trigger on a ``DataFrame`` or a ``Series`` for using ``.loc[]`` or ``[[]]`` when passing a list-of-labels with at least 1 missing label. See the :ref:`deprecation docs `. @@ -288,7 +288,6 @@ Previous Behavior .. code-block:: ipython - In [4]: s.loc[[1, 2, 3]] Out[4]: 1 2.0 @@ -299,6 +298,8 @@ Previous Behavior Current Behavior +.. code-block:: ipython + In [4]: s.loc[[1, 2, 3]] Passing list-likes to .loc or [] with any missing label will raise KeyError in the future, you can use .reindex() as an alternative. @@ -628,6 +629,7 @@ Deprecations - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) +- Passing a non-existant column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) .. _whatsnew_0210.deprecations.argmin_min diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index cf15f4e0fb5169..f76701f721518b 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -359,11 +359,18 @@ def __init__(self, df, na_rep='', float_format=None, cols=None, # all missing, raise if not len(Index(cols) & df.columns): - raise KeyError + raise KeyError( + "passes columns are not ALL present dataframe") + + # deprecatedin gh-17295 + # 1 missing is ok (for now) + if len(Index(cols) & df.columns) != len(cols): + warnings.warn( + "columns must be a subset of the " + "dataframe columns; this will raise " + "a KeyError in the future", + FutureWarning) - # 1 missing is ok - # TODO(jreback) this should raise - # on *any* missing columns self.df = df.reindex(columns=cols) self.columns = self.df.columns self.float_format = float_format diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 5faed0ffa4f8e4..4e25fe03717182 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1808,7 +1808,9 @@ def test_invalid_columns(self): write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) - write_frame.to_excel(path, 'test1', columns=['B', 'C']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + write_frame.to_excel(path, 'test1', columns=['B', 'C']) expected = write_frame.reindex(columns=['B', 'C']) read_frame = read_excel(path, 'test1') tm.assert_frame_equal(expected, read_frame)