pandas-dev · jreback · Dec 4, 2018 · Nov 9, 2018 · Nov 9, 2018 · Nov 9, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -857,6 +857,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`).
 - :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`).
 - Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`).
+- A default repr for ExtensionArrays is now provided (:issue:`23601`).
 
 .. _whatsnew_0240.api.incompatibilities:
 
@@ -966,6 +967,7 @@ Deprecations
 - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`)
 - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of
   `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
+- :meth:`ExtensionArray._formatting_values` is deprecated. Use `ExtensionArray._formatter` instead. (:issue:`23601`)
 
 .. _whatsnew_0240.deprecations.datetimelike_int_ops:
 
@@ -1118,6 +1120,7 @@ Datetimelike
 - Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`)
 - Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)
 - Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`)
+- Bug in the :class:`Series` repr with Period data missing a space before the data (:issue:`23601`)
 - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`)
 - Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`)
 - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -49,6 +49,13 @@ class ExtensionArray(object):
 
     * _formatting_values
 
+    A default repr displaying the type, (truncated) data, length,
+    and dtype is provided. It can be customized or replaced by
+    by overriding:
+
+    * _formatter
+    * __repr__
+
     Some methods require casting the ExtensionArray to an ndarray of Python
     objects with ``self.astype(object)``, which may be expensive. When
     performance is a concern, we highly recommend overriding the following
@@ -653,15 +660,60 @@ def copy(self, deep=False):
         raise AbstractMethodError(self)
 
     # ------------------------------------------------------------------------
-    # Block-related methods
+    # Printing
     # ------------------------------------------------------------------------
 
+    def __repr__(self):
+        from pandas.io.formats.printing import format_object_summary
+
+        template = (
+            '<{class_name}>\n'
+            '{data}\n'
+            'Length: {length}, dtype: {dtype}'
+        )
+        # the short repr has no trailing newline, while the truncated
+        # repr does. So we include a newline in our template, and strip
+        # any trailing newlines from format_object_summary
+        data = format_object_summary(self, self._formatter(), name=False,
+                                     trailing_comma=False).rstrip('\n')
+        name = self.__class__.__name__
+        return template.format(class_name=name, data=data,
+                               length=len(self),
+                               dtype=self.dtype)
+
+    def _formatter(self, formatter=None):
+        # type: (Optional[ExtensionArrayFormatter]) -> Callable[[Any], str]
+        """Formatting function for scalar values.
+
+        This is used in the default '__repr__'. The formatting function
+        receives instances of your scalar type.
+
+        Parameters
+        ----------
+        formatter: GenericArrayFormatter, optional
+            The formatter this array is being rendered with. The formatter
+            may have a `.formatter` method already defined. By default, this
+            will be used if a `formatter` is passed, otherwise the formatter
+            is ``str``.
+
+        Returns
+        -------
+        Callable[[Any], str]
+            A callable that gets instances of the scalar type and
+            returns a string.
+        """
+        return getattr(formatter, 'formatter', None) or str
+
     def _formatting_values(self):
         # type: () -> np.ndarray
         # At the moment, this has to be an array since we use result.dtype
         """An array of values to be printed in, e.g. the Series repr"""
         return np.array(self)
 
+    # ------------------------------------------------------------------------
+    # Reshaping
+    # ------------------------------------------------------------------------
+
     @classmethod
     def _concat_same_type(cls, to_concat):
         # type: (Sequence[ExtensionArray]) -> ExtensionArray

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -499,6 +499,10 @@ def _constructor(self):
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         return Categorical(scalars, dtype=dtype)
 
+    def _formatter(self, formatter):
+        # backwards compat with old printing.
+        return None
+
     def copy(self):
         """ Copy constructor. """
         return self._constructor(values=self._codes.copy(),
@@ -1986,6 +1990,8 @@ def __unicode__(self):
 
         return result
 
+    __repr__ = __unicode__
+
     def _maybe_coerce_indexer(self, indexer):
         """ return an indexer coerced to the codes dtype """
         if isinstance(indexer, np.ndarray) and indexer.dtype.kind == 'i':
@@ -2342,9 +2348,6 @@ def _concat_same_type(self, to_concat):
 
         return _concat_categorical(to_concat)
 
-    def _formatting_values(self):
-        return self
-
     def isin(self, values):
         """
         Check whether `values` are contained in Categorical.

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -6,7 +6,7 @@
 
 from pandas._libs import lib
 from pandas.util._decorators import cache_readonly
-from pandas.compat import u, range, string_types
+from pandas.compat import range, string_types
 from pandas.compat import set_function_name
 
 from pandas.core import nanops
@@ -24,9 +24,6 @@
 from pandas.core.dtypes.dtypes import register_extension_dtype
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas.io.formats.printing import (
-    format_object_summary, format_object_attrs, default_pprint)
-
 
 class _IntegerDtype(ExtensionDtype):
     """
@@ -267,6 +264,15 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
     def _from_factorized(cls, values, original):
         return integer_array(values, dtype=original.dtype)
 
+    def _formatter(self, formatter=None):
+        if formatter is None:
+            def fmt(x):
+                if isna(x):
+                    return 'NaN'
+                return str(x)
+            return fmt
+        return formatter.formatter
+
     def __getitem__(self, item):
         if is_integer(item):
             if self._mask[item]:
@@ -300,10 +306,6 @@ def __iter__(self):
             else:
                 yield self._data[i]
 
-    def _formatting_values(self):
-        # type: () -> np.ndarray
-        return self._coerce_to_ndarray()
-
     def take(self, indexer, allow_fill=False, fill_value=None):
         from pandas.api.extensions import take
 
@@ -353,25 +355,6 @@ def __setitem__(self, key, value):
     def __len__(self):
         return len(self._data)
 
-    def __repr__(self):
-        """
-        Return a string representation for this object.
-
-        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
-        py2/py3.
-        """
-        klass = self.__class__.__name__
-        data = format_object_summary(self, default_pprint, False)
-        attrs = format_object_attrs(self)
-        space = " "
-
-        prepr = (u(",%s") %
-                 space).join(u("%s=%s") % (k, v) for k, v in attrs)
-
-        res = u("%s(%s%s)") % (klass, data, prepr)
-
-        return res
-
     @property
     def nbytes(self):
         return self._data.nbytes + self._mask.nbytes

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -689,9 +689,6 @@ def copy(self, deep=False):
         # TODO: Could skip verify_integrity here.
         return type(self).from_arrays(left, right, closed=closed)
 
-    def _formatting_values(self):
-        return np.asarray(self)
-
     def isna(self):
         return isna(self.left)
 

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -330,13 +330,10 @@ def start_time(self):
     def end_time(self):
         return self.to_timestamp(how='end')
 
-    def __repr__(self):
-        return '<{}>\n{}\nLength: {}, dtype: {}'.format(
-            self.__class__.__name__,
-            [str(s) for s in self],
-            len(self),
-            self.dtype
-        )
+    def _formatter(self, formatter=None):
+        if formatter:
+            return formatter.formatter or str
+        return "'{}'".format
 
     def __setitem__(
             self,

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -1661,6 +1661,16 @@ def __unicode__(self):
             fill=printing.pprint_thing(self.fill_value),
             index=printing.pprint_thing(self.sp_index))
 
+    def _formatter(self, formatter=None):
+        if formatter is None:
+            def fmt(x):
+                if isna(x) and isinstance(x, float):
+                    return 'NaN'
+                return str(x)
+
+            return fmt
+        return formatter.formatter
+
 
 SparseArray._add_arithmetic_ops()
 SparseArray._add_comparison_ops()

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -66,7 +66,7 @@
 import pandas.core.missing as missing
 from pandas.core.base import PandasObject
 
-from pandas.core.arrays import Categorical
+from pandas.core.arrays import Categorical, ExtensionArray
 
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.timedeltas import TimedeltaIndex
@@ -1951,7 +1951,19 @@ def _slice(self, slicer):
         return self.values[slicer]
 
     def formatting_values(self):
-        return self.values._formatting_values()
+        # Deprecating the ability to override _formatting_values.
+        # Do the warning here, it's only user in pandas, since we
+        # have to check if the subclass overrode it.
+        fv = getattr(type(self.values), '_formatting_values', None)
+        if fv and fv != ExtensionArray._formatting_values:
+            msg = (
+                "'ExtensionArray._formatting_values' is deprecated. "
+                "Specify 'ExtensionArray._formatter' instead."
+            )
+            warnings.warn(msg, FutureWarning, stacklevel=10)
+            return self.values._formatting_values()
+
+        return self.values
 
     def concat_same_type(self, to_concat, placement=None):
         """

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -16,11 +16,12 @@
 from pandas.compat import StringIO, lzip, map, u, zip
 
 from pandas.core.dtypes.common import (
-    is_categorical_dtype, is_datetime64_dtype, is_datetimetz, is_float,
-    is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype,
-    is_list_like, is_numeric_dtype, is_period_arraylike, is_scalar,
+    is_categorical_dtype, is_datetime64_dtype, is_datetimetz,
+    is_extension_array_dtype, is_float, is_float_dtype, is_integer,
+    is_integer_dtype, is_list_like, is_numeric_dtype, is_scalar,
     is_timedelta64_dtype)
-from pandas.core.dtypes.generic import ABCMultiIndex, ABCSparseArray
+from pandas.core.dtypes.generic import (
+    ABCIndexClass, ABCMultiIndex, ABCSeries, ABCSparseArray)
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas import compat
@@ -29,7 +30,6 @@
 from pandas.core.config import get_option, set_option
 from pandas.core.index import Index, ensure_index
 from pandas.core.indexes.datetimes import DatetimeIndex
-from pandas.core.indexes.period import PeriodIndex
 
 from pandas.io.common import _expand_user, _stringify_path
 from pandas.io.formats.printing import adjoin, justify, pprint_thing
@@ -849,22 +849,18 @@ def _get_column_name_list(self):
 def format_array(values, formatter, float_format=None, na_rep='NaN',
                  digits=None, space=None, justify='right', decimal='.'):
 
-    if is_categorical_dtype(values):
-        fmt_klass = CategoricalArrayFormatter
-    elif is_interval_dtype(values):
-        fmt_klass = IntervalArrayFormatter
+    if is_datetime64_dtype(values.dtype):
+        fmt_klass = Datetime64Formatter
+    elif is_timedelta64_dtype(values.dtype):
+        fmt_klass = Timedelta64Formatter
+    elif is_extension_array_dtype(values.dtype):
+        fmt_klass = ExtensionArrayFormatter
     elif is_float_dtype(values.dtype):
         fmt_klass = FloatArrayFormatter
-    elif is_period_arraylike(values):
-        fmt_klass = PeriodArrayFormatter
     elif is_integer_dtype(values.dtype):
         fmt_klass = IntArrayFormatter
     elif is_datetimetz(values):
         fmt_klass = Datetime64TZFormatter
-    elif is_datetime64_dtype(values.dtype):
-        fmt_klass = Datetime64Formatter
-    elif is_timedelta64_dtype(values.dtype):
-        fmt_klass = Timedelta64Formatter
     else:
         fmt_klass = GenericArrayFormatter
 
@@ -1126,39 +1122,22 @@ def _format_strings(self):
         return fmt_values.tolist()
 
 
-class IntervalArrayFormatter(GenericArrayFormatter):
-
-    def __init__(self, values, *args, **kwargs):
-        GenericArrayFormatter.__init__(self, values, *args, **kwargs)
-
-    def _format_strings(self):
-        formatter = self.formatter or str
-        fmt_values = np.array([formatter(x) for x in self.values])
-        return fmt_values
-
-
-class PeriodArrayFormatter(IntArrayFormatter):
-
+class ExtensionArrayFormatter(GenericArrayFormatter):
     def _format_strings(self):
-        from pandas.core.indexes.period import IncompatibleFrequency
-        try:
-            values = PeriodIndex(self.values).to_native_types()
-        except IncompatibleFrequency:
-            # periods may contains different freq
-            values = Index(self.values, dtype='object').to_native_types()
-
-        formatter = self.formatter or (lambda x: '{x}'.format(x=x))
-        fmt_values = [formatter(x) for x in values]
-        return fmt_values
-
+        values = self.values
+        if isinstance(values, (ABCIndexClass, ABCSeries)):
+            values = values._values
 
-class CategoricalArrayFormatter(GenericArrayFormatter):
+        formatter = values._formatter(self)
 
-    def __init__(self, values, *args, **kwargs):
-        GenericArrayFormatter.__init__(self, values, *args, **kwargs)
+        if is_categorical_dtype(values.dtype):
+            # Categorical is special for now, so that we can preserve tzinfo
+            array = values.get_values()
+        else:
+            array = np.asarray(values)
 
-    def _format_strings(self):
-        fmt_values = format_array(self.values.get_values(), self.formatter,
+        fmt_values = format_array(array,
 if self.formatter is not None and callable(self.formatter): 
     return [self.formatter(x) for x in values] 
 if self.formatter is not None and callable(self.formatter): 
     return [self.formatter(x) for x in values] 
+                                  formatter,
                                   float_format=self.float_format,
                                   na_rep=self.na_rep, digits=self.digits,
                                   space=self.space, justify=self.justify)