From 0eeff33d7eadbab98bcd5474c2454f118d71a472 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 9 Aug 2018 20:05:13 +0200 Subject: [PATCH] DEPR: list-likes of list-likes in str.cat --- doc/source/text.rst | 6 +++--- doc/source/whatsnew/v0.24.0.txt | 7 +++++-- pandas/core/strings.py | 26 +++++++++++++++++++++----- pandas/tests/test_strings.py | 25 +++++++++++++++++-------- 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 0081b592f91bfc..41542fbc563e57 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -306,20 +306,20 @@ The same alignment can be used when ``others`` is a ``DataFrame``: Concatenating a Series and many objects into a Series ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -All one-dimensional list-likes can be arbitrarily combined in a list-like container (including iterators, ``dict``-views, etc.): +Several objects of type ``Series``, ``Index`` or ``np.ndarray`` can be arbitrarily combined in a list-like container (including iterators, ``dict``-views, etc.): .. ipython:: python s u - s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-') + s.str.cat([u.values, u, u.index.astype(str)], na_rep='-') All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None: .. ipython:: python v - s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-') + s.str.cat([u, v, u.values], join='outer', na_rep='-') If using ``join='right'`` on a list of ``others`` that contains different indexes, the union of these indexes will be used as the basis for the final concatenation: diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f26d3d76592d00..2baba3dd8c82e4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -474,12 +474,15 @@ Other API Changes Deprecations ~~~~~~~~~~~~ -- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`). -- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) +- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. + The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`) +- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) - :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) - :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`) +- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain + arbitrarily many ``Series``, ``Index`` or 1-dimenstional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) .. _whatsnew_0240.prior_deprecations: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 07e744a6284efa..b11869a93a25f4 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1986,11 +1986,20 @@ def _get_series_list(self, others, ignore_index=False): # either one-dimensional list-likes or scalars if all(is_list_like(x) for x in others): los = [] - warn = False + join_warn = False + depr_warn = False # iterate through list and append list of series for each # element (which we check to be one-dimensional and non-nested) while others: nxt = others.pop(0) # nxt is guaranteed list-like by above + + # GH 21950 - DeprecationWarning + # only allowing Series/Index/np.ndarray[1-dim] will greatly + # simply this function post-deprecation. + if not (isinstance(nxt, (Series, Index)) or + (isinstance(nxt, np.ndarray) and nxt.ndim == 1)): + depr_warn = True + if not isinstance(nxt, (DataFrame, Series, Index, np.ndarray)): # safety for non-persistent list-likes (e.g. iterators) @@ -2013,8 +2022,15 @@ def _get_series_list(self, others, ignore_index=False): nxt, wnx = self._get_series_list(nxt, ignore_index=ignore_index) los = los + nxt - warn = warn or wnx - return (los, warn) + join_warn = join_warn or wnx + + if depr_warn: + warnings.warn('list-likes other than Series, Index or ' + 'np.ndarray WITHIN another list-like ' + 'are deprecated and will be removed in ' + 'a future version.', + FutureWarning, stacklevel=3) + return (los, join_warn) elif all(not is_list_like(x) for x in others): return ([Series(others, index=idx)], False) raise TypeError(err_msg) @@ -2037,8 +2053,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): Series/Index/DataFrame) if `join` is not None. If others is a list-like that contains a combination of Series, - np.ndarray (1-dim) or list-like, then all elements will be unpacked - and must satisfy the above criteria individually. + Index or np.ndarray (1-dim), then all elements will be unpacked and + must satisfy the above criteria individually. If others is None, the method returns the concatenation of all strings in the calling Series/Index. diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 9d008dfd25c902..ab508174fa4a93 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -313,7 +313,9 @@ def test_str_cat_mixed_inputs(self, series_or_index): assert_series_or_index_equal(s.str.cat([tt, s]), exp) # Series/Index with list of list-likes - assert_series_or_index_equal(s.str.cat([t.values, list(s)]), exp) + with tm.assert_produces_warning(expected_warning=FutureWarning): + # nested lists will be deprecated + assert_series_or_index_equal(s.str.cat([t.values, list(s)]), exp) # Series/Index with mixed list of Series/list-like # s as Series has same index as t -> no warning @@ -327,7 +329,10 @@ def test_str_cat_mixed_inputs(self, series_or_index): assert_series_or_index_equal(s.str.cat([tt, s.values]), exp) # Series/Index with iterator of list-likes - assert_series_or_index_equal(s.str.cat(iter([t.values, list(s)])), exp) + with tm.assert_produces_warning(expected_warning=FutureWarning): + # nested list-likes will be deprecated + assert_series_or_index_equal(s.str.cat(iter([t.values, list(s)])), + exp) # errors for incorrect lengths rgx = 'All arrays must be same length, except.*' @@ -348,11 +353,11 @@ def test_str_cat_mixed_inputs(self, series_or_index): # list of list-likes with tm.assert_raises_regex(ValueError, rgx): - s.str.cat([z.values, list(s)]) + s.str.cat([z.values, s.values]) # mixed list of Series/list-like with tm.assert_raises_regex(ValueError, rgx): - s.str.cat([z, list(s)]) + s.str.cat([z, s.values]) # errors for incorrect arguments in list-like rgx = 'others must be Series, Index, DataFrame,.*' @@ -423,11 +428,15 @@ def test_str_cat_align_mixed_inputs(self, join): e = concat([t, s], axis=1, join=(join if join == 'inner' else 'outer')) sa, ea = s.align(e, join=join) exp = exp_outer.loc[ea.index] - tm.assert_series_equal(s.str.cat([t, u], join=join, na_rep='-'), exp) + + with tm.assert_produces_warning(expected_warning=FutureWarning): + # nested lists will be deprecated + tm.assert_series_equal(s.str.cat([t, u], join=join, na_rep='-'), + exp) # errors for incorrect lengths rgx = 'If `others` contains arrays or lists.*' - z = ['1', '2', '3'] + z = Series(['1', '2', '3']).values # unindexed object of wrong length with tm.assert_raises_regex(ValueError, rgx): @@ -442,8 +451,8 @@ def test_str_cat_special_cases(self): t = Series(['d', 'a', 'e', 'b'], index=[3, 0, 4, 1]) # iterator of elements with different types - exp = Series(['aaA', 'bbB', 'c-C', 'ddD', '-e-']) - tm.assert_series_equal(s.str.cat(iter([t, ['A', 'B', 'C', 'D']]), + exp = Series(['aaa', 'bbb', 'c-c', 'ddd', '-e-']) + tm.assert_series_equal(s.str.cat(iter([t, s.values]), join='outer', na_rep='-'), exp) # right-align with different indexes in others