changed according to comments

pandas-dev · Aug 31, 2018 · 906e0f7 · 906e0f7
1 parent f92bb0d
commit 906e0f7
Show file tree

Hide file tree

Showing 5 changed files with 235 additions and 80 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -172,18 +172,16 @@ difficult to navigate.
 
 Outputting of ``MultiIndex`` instances now outputs tuples of each row and ensures
 that the tuple items are vertically aligned, so it's now much much easier to
-understand the structure of the ``MultiIndex``. (:issue:`13480`):
+understand the structure of the ``MultiIndex``. Also, the outputs gets
+truncated if it's large. (:issue:`13480`):
 
 .. ipython:: python
 
-   index1=range(1000)
-   index2 = pd.Index(['a'] * 500 + ['abc'] * 500)
-   pd.MultiIndex.from_arrays([index1, index2])
+   pd.MultiIndex.from_product([['a', 'abc'], range(500)])
 
-For number of rows smaller than :attr:`options.display.max_seq_items`, all
-values will be shown (default: 100 items). Horizontally, the output will
+If the number of rows is smaller than :attr:`options.display.max_seq_items`,
+all values will be shown (default: 100 items). Horizontally, the output will
 truncate, if it's longer than :attr:`options.display.width` (default: 80 characters).
-This solves the problem with outputting large MultiIndex instances to the console.
 
 
 .. _whatsnew_0240.enhancements.other:

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -7,7 +7,7 @@
 import numpy as np
 from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp
 
-from pandas.compat import range, zip, lrange, lzip, map, u
+from pandas.compat import range, zip, lrange, lzip, map
 from pandas.compat.numpy import function as nv
 from pandas import compat
 
@@ -619,8 +619,7 @@ def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
         """
-        attrs = []
-        attrs.append(('dtype', "'{}'".format(self.dtype)))
+        attrs = [('dtype', "'{}'".format(self.dtype))]
         if self.names is not None and any(self.names):
             attrs.append(('names', default_pprint(self.names)))
         max_seq_items = get_option('display.max_seq_items') or len(self)
@@ -636,30 +635,7 @@ def _format_data(self, name=None):
         Return the formatted data as a unicode string
         """
         return format_object_summary(self, self._formatter_func,
-                                     name=name, is_multi=True)
-
-    def __unicode__(self):
-        """
-        Return a string representation for this MultiIndex.
-
-        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
-        py2/py3.
-        """
-        klass = self.__class__.__name__
-        data = self._format_data()
-        attrs = self._format_attrs()
-        space = self._format_space()
-
-        prepr = (u(",%s") %
-                 space).join(u("%s=%s") % (k, v) for k, v in attrs)
-
-        # no data provided, just attributes
-        if data is None:
-            data = ''
-
-        res = u("%s(%s%s)") % (klass, data, prepr)
-
-        return res
+                                     name=name, line_break_each_value=True)
 
     def __len__(self):
         return len(self.labels[0])

diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -269,7 +269,7 @@ class TableSchemaFormatter(BaseFormatter):
 
 
 def format_object_summary(obj, formatter, is_justify=True,
-                          name=None, is_multi=False):
+                          name=None, line_break_each_value=False):
     """
     Return the formatted obj as a unicode string
 
@@ -283,8 +283,10 @@ def format_object_summary(obj, formatter, is_justify=True,
         should justify the display
     name : name, optional
         defaults to the class name of the obj
-    is_multi : bool, default False
-        Is ``obj`` a :class:`MultiIndex` or not
+    line_break_each_value : bool, default False
+        If True, inserts a line break for each value of ``obj``.
+        If False, only break lines when the a line of values gets wider
+        than the display width
 
     Returns
     -------
@@ -304,7 +306,11 @@ def format_object_summary(obj, formatter, is_justify=True,
     space2 = "\n%s" % (' ' * (len(name) + 2))
 
     n = len(obj)
-    sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
+    if not line_break_each_value:
+        sep = ','
+    else:
+        # If we want to align on each value, we need a different separator.
+        sep = (',\n ' + ' ' * len(name))
     max_seq_items = get_option('display.max_seq_items') or n
 
     # are we a truncated display
@@ -330,10 +336,10 @@ def best_len(values):
 
     if n == 0:
         summary = '[], '
-    elif n == 1 and not is_multi:
+    elif n == 1 and not line_break_each_value:
         first = formatter(obj[0])
         summary = '[%s], ' % first
-    elif n == 2 and not is_multi:
+    elif n == 2 and not line_break_each_value:
         first = formatter(obj[0])
         last = formatter(obj[-1])
         summary = '[%s, %s], ' % (first, last)
@@ -349,9 +355,15 @@ def best_len(values):
 
         # adjust all values to max length if needed
         if is_justify:
-            head, tail = _justify(head, tail, display_width, best_len,
-                                  is_truncated, is_multi)
-        if is_multi:
+            if line_break_each_value:
+                head, tail = _justify(head, tail)
+            elif (is_truncated or not (len(', '.join(head)) < display_width and
+                                       len(', '.join(tail)) < display_width)):
+                max_length = max(best_len(head), best_len(tail))
+                head = [x.rjust(max_length) for x in head]
+                tail = [x.rjust(max_length) for x in tail]
+
+        if line_break_each_value:
             max_space = display_width - len(space2)
             item = tail[0]
             for i in reversed(range(1, len(item) + 1)):
@@ -384,7 +396,7 @@ def best_len(values):
         summary += line
         summary += '],'
 
-        if len(summary) > (display_width) or is_multi:
+        if len(summary) > (display_width) or line_break_each_value:
             summary += space1
         else:  # one row
             summary += ' '
@@ -395,23 +407,40 @@ def best_len(values):
     return summary
 
 
-def _justify(head, tail, display_width, best_len,
-             is_truncated=False, is_multi=False):
+def _justify(head, tail):
     """
-    Justify each item in head and tail, so they align properly.
+    Justify each item in each list-like in head and tail, so each item
+    right-aligns when the two list-likes are stacked vertically.
+
+    Parameters
+    ----------
+    head : list-like of list-likes of strings
+    tail : list-like of list-likes of strings
+
+    Returns
+    -------
+    head : list of tuples of strings
+    tail : list of tuples of strings
+
+    Examples
+    --------
+    >>> _justify([['a', 'b']], [['abc', 'abcd']])
+    ([('  a', '   b')], [('abc', 'abcd')])
     """
-    if is_multi:
-        max_length = _max_level_item_length(head + tail)
-        head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
-                for seq in head]
-        tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
-                for seq in tail]
-    elif (is_truncated or not (len(', '.join(head)) < display_width and
-                               len(', '.join(tail)) < display_width)):
-        max_length = max(best_len(head), best_len(tail))
-        head = [x.rjust(max_length) for x in head]
-        tail = [x.rjust(max_length) for x in tail]
+    combined = head + tail  # type: List[str]
+
+    # For each position for the sequences in ``combined``,
+    # find the length of the largest string.
+    max_length = [0] * len(combined[0])  # type: List[int]
+    for inner_seq in combined:
+        length = [len(item) for item in inner_seq]
+        max_length = [max(x, y) for x, y in zip(max_length, length)]
 
+    # justify each item in each list-like in head and tail using max_length
+    head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
+            for seq in head]
+    tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
+            for seq in tail]
     return head, tail
 
 

diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py
@@ -2,6 +2,8 @@
 
 import numpy as np
 import pytest
+
+import pandas as pd
 from pandas import Index, MultiIndex
 
 
@@ -53,3 +55,28 @@ def holder():
 def compat_props():
     # a MultiIndex must have these properties associated with it
     return ['shape', 'ndim', 'size']
+
+
+@pytest.fixture
+def narrow_multi_index():
+    """
+    Return a MultiIndex that is less wide than the display (<80 characters).
+    """
+    n = 1000
+    ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
+    dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
+    return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti],
+                                     names=['a', 'b', 'dti'])
+
+
+@pytest.fixture
+def wide_multi_index():
+    """
+    Return a MultiIndex that is wider than the display (>80 characters).
+    """
+    n = 1000
+    ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
+    dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
+    levels = [ci, ci.codes + 9, dti, dti, dti]
+    names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3']
+    return pd.MultiIndex.from_arrays(levels, names=names)