From 9a42cbe85461c28417a5130bc80b035044c5575a Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Sat, 1 Jun 2019 17:03:06 +0200
Subject: [PATCH] API: Series.str-accessor infers dtype (and Index.str does not
 raise on all-NA) (#23167)

---
 doc/source/user_guide/text.rst  |  10 ++
 doc/source/whatsnew/v0.25.0.rst |  40 +++++-
 pandas/core/strings.py          | 214 +++++++++++++++++++++++++-------
 pandas/tests/test_strings.py    |  48 +++----
 4 files changed, 233 insertions(+), 79 deletions(-)

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index f7fdfcf8bf882..87c75e8bcd91f 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -70,6 +70,16 @@ and replacing any remaining whitespaces with underscores:
     ``.str`` methods which operate on elements of type ``list`` are not available on such a
     ``Series``.
 
+.. _text.warn_types:
+
+.. warning::
+
+    Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with
+    v.0.25.0, the type of the Series is inferred and the allowed types (i.e. strings) are enforced more rigorously.
+
+    Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few
+    exceptions, other uses are not supported, and may be disabled at a later point.
+
 
 Splitting and Replacing Strings
 -------------------------------
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 3275223b159f8..87a8010998bd0 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -231,6 +231,43 @@ returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwi
 Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
 cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
 
+The ``.str``-accessor performs stricter type checks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Due to the lack of more fine-grained dtypes, :attr:`Series.str` so far only checked whether the data was
+of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* the Series; in particular,
+``'bytes'``-only data will raise an exception (except for :meth:`Series.str.decode`, :meth:`Series.str.get`,
+:meth:`Series.str.len`, :meth:`Series.str.slice`), see :issue:`23163`, :issue:`23011`, :issue:`23551`.
+
+*Previous Behaviour*:
+
+.. code-block:: python
+
+    In [1]: s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object)
+
+    In [2]: s
+    Out[2]:
+    0      b'a'
+    1     b'ba'
+    2    b'cba'
+    dtype: object
+
+    In [3]: s.str.startswith(b'a')
+    Out[3]:
+    0     True
+    1    False
+    2    False
+    dtype: bool
+
+*New Behaviour*:
+
+.. ipython:: python
+    :okexcept:
+
+    s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object)
+    s
+    s.str.startswith(b'a')
+
 .. _whatsnew_0250.api_breaking.incompatible_index_unions
 
 Incompatible Index Type Unions
@@ -331,7 +368,6 @@ This change is backward compatible for direct usage of Pandas, but if you subcla
 Pandas objects *and* give your subclasses specific ``__str__``/``__repr__`` methods,
 you may have to adjust your ``__str__``/``__repr__`` methods (:issue:`26495`).
 
-
 .. _whatsnew_0250.api_breaking.deps:
 
 Increased minimum versions for dependencies
@@ -537,7 +573,7 @@ Conversion
 Strings
 ^^^^^^^
 
--
+- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
 -
 -
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index ee3796241690d..bd756491abd2f 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1,4 +1,5 @@
 import codecs
+from functools import wraps
 import re
 import textwrap
 from typing import Dict
@@ -12,8 +13,8 @@
 
 from pandas.core.dtypes.common import (
     ensure_object, is_bool_dtype, is_categorical_dtype, is_integer,
-    is_list_like, is_object_dtype, is_re, is_scalar, is_string_like)
-from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
+    is_list_like, is_re, is_scalar, is_string_like)
+from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.algorithms import take_1d
@@ -1720,12 +1721,78 @@ def str_encode(arr, encoding, errors="strict"):
     return _na_map(f, arr)
 
 
-def _noarg_wrapper(f, docstring=None, **kargs):
+def forbid_nonstring_types(forbidden, name=None):
+    """
+    Decorator to forbid specific types for a method of StringMethods.
+
+    For calling `.str.{method}` on a Series or Index, it is necessary to first
+    initialize the :class:`StringMethods` object, and then call the method.
+    However, different methods allow different input types, and so this can not
+    be checked during :meth:`StringMethods.__init__`, but must be done on a
+    per-method basis. This decorator exists to facilitate this process, and
+    make it explicit which (inferred) types are disallowed by the method.
+
+    :meth:`StringMethods.__init__` allows the *union* of types its different
+    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
+    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
+
+    The default string types ['string', 'empty'] are allowed for all methods.
+    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
+    then needs to forbid the types it is not intended for.
+
+    Parameters
+    ----------
+    forbidden : list-of-str or None
+        List of forbidden non-string types, may be one or more of
+        `['bytes', 'mixed', 'mixed-integer']`.
+    name : str, default None
+        Name of the method to use in the error message. By default, this is
+        None, in which case the name from the method being wrapped will be
+        copied. However, for working with further wrappers (like _pat_wrapper
+        and _noarg_wrapper), it is necessary to specify the name.
+
+    Returns
+    -------
+    func : wrapper
+        The method to which the decorator is applied, with an added check that
+        enforces the inferred type to not be in the list of forbidden types.
+
+    Raises
+    ------
+    TypeError
+        If the inferred type of the underlying data is in `forbidden`.
+    """
+
+    # deal with None
+    forbidden = [] if forbidden is None else forbidden
+
+    allowed_types = {'string', 'empty', 'bytes',
+                     'mixed', 'mixed-integer'} - set(forbidden)
+
+    def _forbid_nonstring_types(func):
+        func_name = func.__name__ if name is None else name
+
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self._inferred_dtype not in allowed_types:
+                msg = ('Cannot use .str.{name} with values of inferred dtype '
+                       '{inf_type!r}.'.format(name=func_name,
+                                              inf_type=self._inferred_dtype))
+                raise TypeError(msg)
+            return func(self, *args, **kwargs)
+        wrapper.__name__ = func_name
+        return wrapper
+    return _forbid_nonstring_types
+
+
+def _noarg_wrapper(f, name=None, docstring=None, forbidden_types=['bytes'],
+                   **kargs):
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper(self):
         result = _na_map(f, self._parent, **kargs)
         return self._wrap_result(result)
 
-    wrapper.__name__ = f.__name__
+    wrapper.__name__ = f.__name__ if name is None else name
     if docstring is not None:
         wrapper.__doc__ = docstring
     else:
@@ -1734,22 +1801,26 @@ def wrapper(self):
     return wrapper
 
 
-def _pat_wrapper(f, flags=False, na=False, **kwargs):
+def _pat_wrapper(f, flags=False, na=False, name=None,
+                 forbidden_types=['bytes'], **kwargs):
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper1(self, pat):
         result = f(self._parent, pat)
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper2(self, pat, flags=0, **kwargs):
         result = f(self._parent, pat, flags=flags, **kwargs)
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper3(self, pat, na=np.nan):
         result = f(self._parent, pat, na=na)
         return self._wrap_result(result)
 
     wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
 
-    wrapper.__name__ = f.__name__
+    wrapper.__name__ = f.__name__ if name is None else name
     if f.__doc__:
         wrapper.__doc__ = f.__doc__
 
@@ -1780,7 +1851,7 @@ class StringMethods(NoNewAttributesMixin):
     """
 
     def __init__(self, data):
-        self._validate(data)
+        self._inferred_dtype = self._validate(data)
         self._is_categorical = is_categorical_dtype(data)
 
         # .values.categories works for both Series/Index
@@ -1791,38 +1862,44 @@ def __init__(self, data):
 
     @staticmethod
     def _validate(data):
-        from pandas.core.index import Index
-
-        if (isinstance(data, ABCSeries) and
-                not ((is_categorical_dtype(data.dtype) and
-                      is_object_dtype(data.values.categories)) or
-                     (is_object_dtype(data.dtype)))):
-            # it's neither a string series not a categorical series with
-            # strings inside the categories.
-            # this really should exclude all series with any non-string values
-            # (instead of test for object dtype), but that isn't practical for
-            # performance reasons until we have a str dtype (GH 9343)
+        """
+        Auxiliary function for StringMethods, infers and checks dtype of data.
+
+        This is a "first line of defence" at the creation of the StringMethods-
+        object (see _make_accessor), and just checks that the dtype is in the
+        *union* of the allowed types over all string methods below; this
+        restriction is then refined on a per-method basis using the decorator
+        @forbid_nonstring_types (more info in the corresponding docstring).
+
+        This really should exclude all series/index with any non-string values,
+        but that isn't practical for performance reasons until we have a str
+        dtype (GH 9343 / 13877)
+
+        Parameters
+        ----------
+        data : The content of the Series
+
+        Returns
+        -------
+        dtype : inferred dtype of data
+        """
+        if isinstance(data, ABCMultiIndex):
+            raise AttributeError('Can only use .str accessor with Index, '
+                                 'not MultiIndex')
+
+        # see _libs/lib.pyx for list of inferred types
+        allowed_types = ['string', 'empty', 'bytes', 'mixed', 'mixed-integer']
+
+        values = getattr(data, 'values', data)  # Series / Index
+        values = getattr(values, 'categories', values)  # categorical / normal
+
+        # missing values obfuscate type inference -> skip
+        inferred_dtype = lib.infer_dtype(values, skipna=True)
+
+        if inferred_dtype not in allowed_types:
             raise AttributeError("Can only use .str accessor with string "
-                                 "values, which use np.object_ dtype in "
-                                 "pandas")
-        elif isinstance(data, Index):
-            # can't use ABCIndex to exclude non-str
-
-            # see src/inference.pyx which can contain string values
-            allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
-            if is_categorical_dtype(data.dtype):
-                inf_type = data.categories.inferred_type
-            else:
-                inf_type = data.inferred_type
-            if inf_type not in allowed_types:
-                message = ("Can only use .str accessor with string values "
-                           "(i.e. inferred_type is 'string', 'unicode' or "
-                           "'mixed')")
-                raise AttributeError(message)
-            if data.nlevels > 1:
-                message = ("Can only use .str accessor with Index, not "
-                           "MultiIndex")
-                raise AttributeError(message)
+                                 "values!")
+        return inferred_dtype
 
     def __getitem__(self, key):
         if isinstance(key, slice):
@@ -2025,12 +2102,13 @@ def _get_series_list(self, others, ignore_index=False):
                     warnings.warn('list-likes other than Series, Index, or '
                                   'np.ndarray WITHIN another list-like are '
                                   'deprecated and will be removed in a future '
-                                  'version.', FutureWarning, stacklevel=3)
+                                  'version.', FutureWarning, stacklevel=4)
                 return (los, join_warn)
             elif all(not is_list_like(x) for x in others):
                 return ([Series(others, index=idx)], False)
         raise TypeError(err_msg)
 
+    @forbid_nonstring_types(['bytes', 'mixed', 'mixed-integer'])
     def cat(self, others=None, sep=None, na_rep=None, join=None):
         """
         Concatenate strings in the Series/Index with given separator.
@@ -2211,7 +2289,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
                           "Index/DataFrame in `others`. To enable alignment "
                           "and silence this warning, pass `join='left'|"
                           "'outer'|'inner'|'right'`. The future default will "
-                          "be `join='left'`.", FutureWarning, stacklevel=2)
+                          "be `join='left'`.", FutureWarning, stacklevel=3)
 
         # if join is None, _get_series_list already force-aligned indexes
         join = 'left' if join is None else join
@@ -2384,6 +2462,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
     @Appender(_shared_docs['str_split'] % {
         'side': 'beginning',
         'method': 'split'})
+    @forbid_nonstring_types(['bytes'])
     def split(self, pat=None, n=-1, expand=False):
         result = str_split(self._parent, pat, n=n)
         return self._wrap_result(result, expand=expand)
@@ -2391,6 +2470,7 @@ def split(self, pat=None, n=-1, expand=False):
     @Appender(_shared_docs['str_split'] % {
         'side': 'end',
         'method': 'rsplit'})
+    @forbid_nonstring_types(['bytes'])
     def rsplit(self, pat=None, n=-1, expand=False):
         result = str_rsplit(self._parent, pat, n=n)
         return self._wrap_result(result, expand=expand)
@@ -2485,6 +2565,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
                 '`sep`.'
     })
     @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep')
+    @forbid_nonstring_types(['bytes'])
     def partition(self, sep=' ', expand=True):
         f = lambda x: x.partition(sep)
         result = _na_map(f, self._parent)
@@ -2498,6 +2579,7 @@ def partition(self, sep=' ', expand=True):
                 '`sep`.'
     })
     @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep')
+    @forbid_nonstring_types(['bytes'])
     def rpartition(self, sep=' ', expand=True):
         f = lambda x: x.rpartition(sep)
         result = _na_map(f, self._parent)
@@ -2509,33 +2591,39 @@ def get(self, i):
         return self._wrap_result(result)
 
     @copy(str_join)
+    @forbid_nonstring_types(['bytes'])
     def join(self, sep):
         result = str_join(self._parent, sep)
         return self._wrap_result(result)
 
     @copy(str_contains)
+    @forbid_nonstring_types(['bytes'])
     def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
         result = str_contains(self._parent, pat, case=case, flags=flags, na=na,
                               regex=regex)
         return self._wrap_result(result, fill_value=na)
 
     @copy(str_match)
+    @forbid_nonstring_types(['bytes'])
     def match(self, pat, case=True, flags=0, na=np.nan):
         result = str_match(self._parent, pat, case=case, flags=flags, na=na)
         return self._wrap_result(result, fill_value=na)
 
     @copy(str_replace)
+    @forbid_nonstring_types(['bytes'])
     def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         result = str_replace(self._parent, pat, repl, n=n, case=case,
                              flags=flags, regex=regex)
         return self._wrap_result(result)
 
     @copy(str_repeat)
+    @forbid_nonstring_types(['bytes'])
     def repeat(self, repeats):
         result = str_repeat(self._parent, repeats)
         return self._wrap_result(result)
 
     @copy(str_pad)
+    @forbid_nonstring_types(['bytes'])
     def pad(self, width, side='left', fillchar=' '):
         result = str_pad(self._parent, width, side=side, fillchar=fillchar)
         return self._wrap_result(result)
@@ -2559,17 +2647,21 @@ def pad(self, width, side='left', fillchar=' '):
 
     @Appender(_shared_docs['str_pad'] % dict(side='left and right',
                                              method='center'))
+    @forbid_nonstring_types(['bytes'])
     def center(self, width, fillchar=' '):
         return self.pad(width, side='both', fillchar=fillchar)
 
     @Appender(_shared_docs['str_pad'] % dict(side='right', method='ljust'))
+    @forbid_nonstring_types(['bytes'])
     def ljust(self, width, fillchar=' '):
         return self.pad(width, side='right', fillchar=fillchar)
 
     @Appender(_shared_docs['str_pad'] % dict(side='left', method='rjust'))
+    @forbid_nonstring_types(['bytes'])
     def rjust(self, width, fillchar=' '):
         return self.pad(width, side='left', fillchar=fillchar)
 
+    @forbid_nonstring_types(['bytes'])
     def zfill(self, width):
         """
         Pad strings in the Series/Index by prepending '0' characters.
@@ -2639,16 +2731,19 @@ def slice(self, start=None, stop=None, step=None):
         return self._wrap_result(result)
 
     @copy(str_slice_replace)
+    @forbid_nonstring_types(['bytes'])
     def slice_replace(self, start=None, stop=None, repl=None):
         result = str_slice_replace(self._parent, start, stop, repl)
         return self._wrap_result(result)
 
     @copy(str_decode)
     def decode(self, encoding, errors="strict"):
+        # need to allow bytes here
         result = str_decode(self._parent, encoding, errors)
         return self._wrap_result(result)
 
     @copy(str_encode)
+    @forbid_nonstring_types(['bytes'])
     def encode(self, encoding, errors="strict"):
         result = str_encode(self._parent, encoding, errors)
         return self._wrap_result(result)
@@ -2718,28 +2813,33 @@ def encode(self, encoding, errors="strict"):
 
     @Appender(_shared_docs['str_strip'] % dict(side='left and right sides',
                                                method='strip'))
+    @forbid_nonstring_types(['bytes'])
     def strip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='both')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='left side',
                                                method='lstrip'))
+    @forbid_nonstring_types(['bytes'])
     def lstrip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='left')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='right side',
                                                method='rstrip'))
+    @forbid_nonstring_types(['bytes'])
     def rstrip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='right')
         return self._wrap_result(result)
 
     @copy(str_wrap)
+    @forbid_nonstring_types(['bytes'])
     def wrap(self, width, **kwargs):
         result = str_wrap(self._parent, width, **kwargs)
         return self._wrap_result(result)
 
     @copy(str_get_dummies)
+    @forbid_nonstring_types(['bytes'])
     def get_dummies(self, sep='|'):
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
@@ -2749,20 +2849,23 @@ def get_dummies(self, sep='|'):
                                  name=name, expand=True)
 
     @copy(str_translate)
+    @forbid_nonstring_types(['bytes'])
     def translate(self, table):
         result = str_translate(self._parent, table)
         return self._wrap_result(result)
 
-    count = _pat_wrapper(str_count, flags=True)
-    startswith = _pat_wrapper(str_startswith, na=True)
-    endswith = _pat_wrapper(str_endswith, na=True)
-    findall = _pat_wrapper(str_findall, flags=True)
+    count = _pat_wrapper(str_count, flags=True, name='count')
+    startswith = _pat_wrapper(str_startswith, na=True, name='startswith')
+    endswith = _pat_wrapper(str_endswith, na=True, name='endswith')
+    findall = _pat_wrapper(str_findall, flags=True, name='findall')
 
     @copy(str_extract)
+    @forbid_nonstring_types(['bytes'])
     def extract(self, pat, flags=0, expand=True):
         return str_extract(self, pat, flags=flags, expand=expand)
 
     @copy(str_extractall)
+    @forbid_nonstring_types(['bytes'])
     def extractall(self, pat, flags=0):
         return str_extractall(self._orig, pat, flags=flags)
 
@@ -2792,6 +2895,7 @@ def extractall(self, pat, flags=0):
     @Appender(_shared_docs['find'] %
               dict(side='lowest', method='find',
                    also='rfind : Return highest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def find(self, sub, start=0, end=None):
         result = str_find(self._parent, sub, start=start, end=end, side='left')
         return self._wrap_result(result)
@@ -2799,11 +2903,13 @@ def find(self, sub, start=0, end=None):
     @Appender(_shared_docs['find'] %
               dict(side='highest', method='rfind',
                    also='find : Return lowest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def rfind(self, sub, start=0, end=None):
         result = str_find(self._parent, sub,
                           start=start, end=end, side='right')
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(['bytes'])
     def normalize(self, form):
         """
         Return the Unicode normal form for the strings in the Series/Index.
@@ -2851,6 +2957,7 @@ def normalize(self, form):
     @Appender(_shared_docs['index'] %
               dict(side='lowest', similar='find', method='index',
                    also='rindex : Return highest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def index(self, sub, start=0, end=None):
         result = str_index(self._parent, sub,
                            start=start, end=end, side='left')
@@ -2859,6 +2966,7 @@ def index(self, sub, start=0, end=None):
     @Appender(_shared_docs['index'] %
               dict(side='highest', similar='rfind', method='rindex',
                    also='index : Return lowest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def rindex(self, sub, start=0, end=None):
         result = str_index(self._parent, sub,
                            start=start, end=end, side='right')
@@ -2908,7 +3016,8 @@ def rindex(self, sub, start=0, end=None):
     5    3.0
     dtype: float64
     """)
-    len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)
+    len = _noarg_wrapper(len, docstring=_shared_docs['len'],
+                         forbidden_types=None, dtype=int)
 
     _shared_docs['casemethods'] = ("""
     Convert strings in the Series/Index to %(type)s.
@@ -2989,21 +3098,27 @@ def rindex(self, sub, start=0, end=None):
     _doc_args['casefold'] = dict(type='be casefolded', method='casefold',
                                  version='\n    .. versionadded:: 0.25.0\n')
     lower = _noarg_wrapper(lambda x: x.lower(),
+                           name='lower',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['lower'])
     upper = _noarg_wrapper(lambda x: x.upper(),
+                           name='upper',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['upper'])
     title = _noarg_wrapper(lambda x: x.title(),
+                           name='title',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['title'])
     capitalize = _noarg_wrapper(lambda x: x.capitalize(),
+                                name='capitalize',
                                 docstring=_shared_docs['casemethods'] %
                                 _doc_args['capitalize'])
     swapcase = _noarg_wrapper(lambda x: x.swapcase(),
+                              name='swapcase',
                               docstring=_shared_docs['casemethods'] %
                               _doc_args['swapcase'])
     casefold = _noarg_wrapper(lambda x: x.casefold(),
+                              name='casefold',
                               docstring=_shared_docs['casemethods'] %
                               _doc_args['casefold'])
 
@@ -3157,30 +3272,39 @@ def rindex(self, sub, start=0, end=None):
     _doc_args['isnumeric'] = dict(type='numeric', method='isnumeric')
     _doc_args['isdecimal'] = dict(type='decimal', method='isdecimal')
     isalnum = _noarg_wrapper(lambda x: x.isalnum(),
+                             name='isalnum',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isalnum'])
     isalpha = _noarg_wrapper(lambda x: x.isalpha(),
+                             name='isalpha',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isalpha'])
     isdigit = _noarg_wrapper(lambda x: x.isdigit(),
+                             name='isdigit',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isdigit'])
     isspace = _noarg_wrapper(lambda x: x.isspace(),
+                             name='isspace',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isspace'])
     islower = _noarg_wrapper(lambda x: x.islower(),
+                             name='islower',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['islower'])
     isupper = _noarg_wrapper(lambda x: x.isupper(),
+                             name='isupper',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isupper'])
     istitle = _noarg_wrapper(lambda x: x.istitle(),
+                             name='istitle',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['istitle'])
     isnumeric = _noarg_wrapper(lambda x: x.isnumeric(),
+                               name='isnumeric',
                                docstring=_shared_docs['ismethods'] %
                                _doc_args['isnumeric'])
     isdecimal = _noarg_wrapper(lambda x: x.isdecimal(),
+                               name='isdecimal',
                                docstring=_shared_docs['ismethods'] %
                                _doc_args['isdecimal'])
 
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 2951ca24fa7ff..1ba0ef3918fb7 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -150,6 +150,9 @@ def any_allowed_skipna_inferred_dtype(request):
     ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
     ...     # will pass
     ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
+    ...
+    ...     # constructor for .str-accessor will also pass
+    ...     pd.Series(values).str
     """
     inferred_dtype, values = request.param
     values = np.array(values, dtype=object)  # object dtype to avoid casting
@@ -179,20 +182,6 @@ def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
             pytest.xfail(reason='Conversion to numpy array fails because '
                          'the ._values-attribute is not a numpy array for '
                          'PeriodArray/IntervalArray; see GH 23553')
-        if box == Index and inferred_dtype in ['empty', 'bytes']:
-            pytest.xfail(reason='Raising too restrictively; '
-                         'solved by GH 23167')
-        if (box == Index and dtype == object
-                and inferred_dtype in ['boolean', 'date', 'time']):
-            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
-                         'solved by GH 23167')
-        if (box == Series
-                and (dtype == object and inferred_dtype not in [
-                    'string', 'unicode', 'empty',
-                    'bytes', 'mixed', 'mixed-integer'])
-                or (dtype == 'category'
-                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
-            pytest.xfail(reason='Not raising correctly; solved by GH 23167')
 
         types_passing_constructor = ['string', 'unicode', 'empty',
                                      'bytes', 'mixed', 'mixed-integer']
@@ -220,27 +209,21 @@ def test_api_per_method(self, box, dtype,
         method_name, args, kwargs = any_string_method
 
         # TODO: get rid of these xfails
-        if (method_name not in ['encode', 'decode', 'len']
-                and inferred_dtype == 'bytes'):
-            pytest.xfail(reason='Not raising for "bytes", see GH 23011;'
-                         'Also: malformed method names, see GH 23551; '
-                         'solved by GH 23167')
-        if (method_name == 'cat'
-                and inferred_dtype in ['mixed', 'mixed-integer']):
-            pytest.xfail(reason='Bad error message; should raise better; '
-                         'solved by GH 23167')
-        if box == Index and inferred_dtype in ['empty', 'bytes']:
-            pytest.xfail(reason='Raising too restrictively; '
-                         'solved by GH 23167')
-        if (box == Index and dtype == object
-                and inferred_dtype in ['boolean', 'date', 'time']):
-            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
-                         'solved by GH 23167')
+        if (method_name in ['partition', 'rpartition'] and box == Index
+                and inferred_dtype == 'empty'):
+            pytest.xfail(reason='Method cannot deal with empty Index')
+        if (method_name == 'split' and box == Index and values.size == 0
+                and kwargs.get('expand', None) is not None):
+            pytest.xfail(reason='Split fails on empty Series when expand=True')
+        if (method_name == 'get_dummies' and box == Index
+                and inferred_dtype == 'empty' and (dtype == object
+                                                   or values.size == 0)):
+            pytest.xfail(reason='Need to fortify get_dummies corner cases')
 
         t = box(values, dtype=dtype)  # explicit dtype to avoid casting
         method = getattr(t.str, method_name)
 
-        bytes_allowed = method_name in ['encode', 'decode', 'len']
+        bytes_allowed = method_name in ['decode', 'get', 'len', 'slice']
         # as of v0.23.4, all methods except 'cat' are very lenient with the
         # allowed data types, just returning NaN for entries that error.
         # This could be changed with an 'errors'-kwarg to the `str`-accessor,
@@ -3167,7 +3150,8 @@ def test_str_accessor_no_new_attributes(self):
     def test_method_on_bytes(self):
         lhs = Series(np.array(list('abc'), 'S1').astype(object))
         rhs = Series(np.array(list('def'), 'S1').astype(object))
-        with pytest.raises(TypeError, match="can't concat str to bytes"):
+        with pytest.raises(TypeError,
+                           match="Cannot use .str.cat with values of.*"):
             lhs.str.cat(rhs)
 
     def test_casefold(self):