Skip to content

Commit

Permalink
DEPR: execute deprecations for str.cat in v1.0 (#27611)
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari authored and jreback committed Jul 31, 2019
1 parent eb9a8e3 commit 9000c39
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 185 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`)
- :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`)
- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`)
-

.. _whatsnew_1000.performance:
Expand Down
175 changes: 44 additions & 131 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
is_scalar,
is_string_like,
)
from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex, ABCSeries
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndexClass,
ABCMultiIndex,
ABCSeries,
)
from pandas.core.dtypes.missing import isna

from pandas.core.algorithms import take_1d
Expand Down Expand Up @@ -2061,130 +2066,64 @@ def cons_row(x):
cons = self._orig._constructor
return cons(result, name=name, index=index)

def _get_series_list(self, others, ignore_index=False):
def _get_series_list(self, others):
"""
Auxiliary function for :meth:`str.cat`. Turn potentially mixed input
into a list of Series (elements without an index must match the length
of the calling Series/Index).
Parameters
----------
others : Series, Index, DataFrame, np.ndarray, list-like or list-like
of objects that are Series, Index or np.ndarray (1-dim)
ignore_index : boolean, default False
Determines whether to forcefully align others with index of caller
others : Series, DataFrame, np.ndarray, list-like or list-like of
objects that are either Series, Index or np.ndarray (1-dim)
Returns
-------
tuple : (others transformed into list of Series,
boolean whether FutureWarning should be raised)
list : others transformed into list of Series
"""

# Once str.cat defaults to alignment, this function can be simplified;
# will not need `ignore_index` and the second boolean output anymore

from pandas import Series, DataFrame

# self._orig is either Series or Index
idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index

err_msg = (
"others must be Series, Index, DataFrame, np.ndarray or "
"list-like (either containing only strings or containing "
"only objects of type Series/Index/list-like/np.ndarray)"
)

# Generally speaking, all objects without an index inherit the index
# `idx` of the calling Series/Index - i.e. must have matching length.
# Objects with an index (i.e. Series/Index/DataFrame) keep their own
# index, *unless* ignore_index is set to True.
# Objects with an index (i.e. Series/Index/DataFrame) keep their own.
if isinstance(others, ABCSeries):
warn = not others.index.equals(idx)
# only reconstruct Series when absolutely necessary
los = [
Series(others.values, index=idx) if ignore_index and warn else others
]
return (los, warn)
return [others]
elif isinstance(others, ABCIndexClass):
warn = not others.equals(idx)
los = [Series(others.values, index=(idx if ignore_index else others))]
return (los, warn)
elif isinstance(others, DataFrame):
warn = not others.index.equals(idx)
if ignore_index and warn:
# without copy, this could change "others"
# that was passed to str.cat
others = others.copy()
others.index = idx
return ([others[x] for x in others], warn)
return [Series(others.values, index=others)]
elif isinstance(others, ABCDataFrame):
return [others[x] for x in others]
elif isinstance(others, np.ndarray) and others.ndim == 2:
others = DataFrame(others, index=idx)
return ([others[x] for x in others], False)
return [others[x] for x in others]
elif is_list_like(others, allow_sets=False):
others = list(others) # ensure iterators do not get read twice etc

# in case of list-like `others`, all elements must be
# either one-dimensional list-likes or scalars
if all(is_list_like(x, allow_sets=False) for x in others):
# either Series/Index/np.ndarray (1-dim)...
if all(
isinstance(x, (ABCSeries, ABCIndexClass))
or (isinstance(x, np.ndarray) and x.ndim == 1)
for x in others
):
los = []
join_warn = False
depr_warn = False
# iterate through list and append list of series for each
# element (which we check to be one-dimensional and non-nested)
while others:
nxt = others.pop(0) # nxt is guaranteed list-like by above

# GH 21950 - DeprecationWarning
# only allowing Series/Index/np.ndarray[1-dim] will greatly
# simply this function post-deprecation.
if not (
isinstance(nxt, (Series, ABCIndexClass))
or (isinstance(nxt, np.ndarray) and nxt.ndim == 1)
):
depr_warn = True

if not isinstance(
nxt, (DataFrame, Series, ABCIndexClass, np.ndarray)
):
# safety for non-persistent list-likes (e.g. iterators)
# do not map indexed/typed objects; info needed below
nxt = list(nxt)

# known types for which we can avoid deep inspection
no_deep = (
isinstance(nxt, np.ndarray) and nxt.ndim == 1
) or isinstance(nxt, (Series, ABCIndexClass))
# nested list-likes are forbidden:
# -> elements of nxt must not be list-like
is_legal = (no_deep and nxt.dtype == object) or all(
not is_list_like(x) for x in nxt
)

# DataFrame is false positive of is_legal
# because "x in df" returns column names
if not is_legal or isinstance(nxt, DataFrame):
raise TypeError(err_msg)

nxt, wnx = self._get_series_list(nxt, ignore_index=ignore_index)
los = los + nxt
join_warn = join_warn or wnx

if depr_warn:
warnings.warn(
"list-likes other than Series, Index, or "
"np.ndarray WITHIN another list-like are "
"deprecated and will be removed in a future "
"version.",
FutureWarning,
stacklevel=4,
)
return (los, join_warn)
while others: # iterate through list and append each element
los = los + self._get_series_list(others.pop(0))
return los
# ... or just strings
elif all(not is_list_like(x) for x in others):
return ([Series(others, index=idx)], False)
raise TypeError(err_msg)
return [Series(others, index=idx)]
raise TypeError(
"others must be Series, Index, DataFrame, np.ndarrary "
"or list-like (either containing only strings or "
"containing only objects of type Series/Index/"
"np.ndarray[1-dim])"
)

@forbid_nonstring_types(["bytes", "mixed", "mixed-integer"])
def cat(self, others=None, sep=None, na_rep=None, join=None):
def cat(self, others=None, sep=None, na_rep=None, join="left"):
"""
Concatenate strings in the Series/Index with given separator.
Expand Down Expand Up @@ -2218,16 +2157,15 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
- If `na_rep` is None, and `others` is not None, a row containing a
missing value in any of the columns (before concatenation) will
have a missing value in the result.
join : {'left', 'right', 'outer', 'inner'}, default None
join : {'left', 'right', 'outer', 'inner'}, default 'left'
Determines the join-style between the calling Series/Index and any
Series/Index/DataFrame in `others` (objects without an index need
to match the length of the calling Series/Index). If None,
alignment is disabled, but this option will be removed in a future
version of pandas and replaced with a default of `'left'`. To
disable alignment, use `.values` on any Series/Index/DataFrame in
`others`.
to match the length of the calling Series/Index). To disable
alignment, use `.values` on any Series/Index/DataFrame in `others`.
.. versionadded:: 0.23.0
.. versionchanged:: 1.0.0
Changed default of `join` from None to `'left'`.
Returns
-------
Expand Down Expand Up @@ -2343,39 +2281,14 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):

try:
# turn anything in "others" into lists of Series
others, warn = self._get_series_list(others, ignore_index=(join is None))
others = self._get_series_list(others)
except ValueError: # do not catch TypeError raised by _get_series_list
if join is None:
raise ValueError(
"All arrays must be same length, except "
"those having an index if `join` is not None"
)
else:
raise ValueError(
"If `others` contains arrays or lists (or "
"other list-likes without an index), these "
"must all be of the same length as the "
"calling Series/Index."
)

if join is None and warn:
warnings.warn(
"A future version of pandas will perform index "
"alignment when `others` is a Series/Index/"
"DataFrame (or a list-like containing one). To "
"disable alignment (the behavior before v.0.23) and "
"silence this warning, use `.values` on any Series/"
"Index/DataFrame in `others`. To enable alignment "
"and silence this warning, pass `join='left'|"
"'outer'|'inner'|'right'`. The future default will "
"be `join='left'`.",
FutureWarning,
stacklevel=3,
raise ValueError(
"If `others` contains arrays or lists (or other "
"list-likes without an index), these must all be "
"of the same length as the calling Series/Index."
)

# if join is None, _get_series_list already force-aligned indexes
join = "left" if join is None else join

# align if required
if any(not data.index.equals(x.index) for x in others):
# Need to add keys for uniqueness in case of duplicate columns
Expand Down
Loading

0 comments on commit 9000c39

Please sign in to comment.