Skip to content

Commit

Permalink
ENH: better MultiIndex.__repr__
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Aug 26, 2018
1 parent 9f6c02d commit dd81bdd
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 74 deletions.
26 changes: 26 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,32 @@ This is the same behavior as ``Series.values`` for categorical data. See
:ref:`whatsnew_0240.api_breaking.interval_values` for more.


.. _whatsnew_0240.enhancements.multi_index_repr:

Better repr for MultiIndex
^^^^^^^^^^^^^^^^^^^^^^^^^^

Previously, outputting a :class:`MultiIndex` printed the levels/labels of the
multiindex. This was visually unappealing and made it difficult to understand
the structure of the MultiIndex. Also, this could be a problem for large
indices as the output could be slow to print and make the console output
difficult to navigate.

Outputting of ``MultiIndex`` instances now outputs tuples of each row and ensures
that the tuple items are vertically aligned, so it's now much much easier to
understand the structure of the ``MultiIndex``. (:issue:`13480`):

.. ipython:: python

index1=range(1000)
index2 = pd.Index(['a'] * 500 + ['abc'] * 500)
pd.MultiIndex.from_arrays([index1, index2])

For number of rows smaller than :attr:`options.display.max_seq_items`, all
values will be shown (default 100). Horizontally, the output will
truncate, if it's longer than :attr:`options.display.width`.


.. _whatsnew_0240.enhancements.other:

Other Enhancements
Expand Down
60 changes: 46 additions & 14 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp

from pandas.compat import range, zip, lrange, lzip, map
from pandas.compat import range, zip, lrange, lzip, map, u
from pandas.compat.numpy import function as nv
from pandas import compat

Expand All @@ -31,7 +31,8 @@
import pandas.core.common as com
import pandas.core.missing as missing
import pandas.core.algorithms as algos
from pandas.io.formats.printing import pprint_thing
from pandas.io.formats.printing import (format_object_summary,
default_pprint, pprint_thing)

from pandas.core.config import get_option

Expand Down Expand Up @@ -607,27 +608,58 @@ def _nbytes(self, deep=False):
result += self._engine.sizeof(deep=deep)
return result

def _formatter_func(self, tup):
"""
Formats each item in tup according to its level's formatter function.
"""
formatter_funcs = [level._formatter_func for level in self.levels]
return tuple(func(val) for func, val in zip(formatter_funcs, tup))

def _format_attrs(self):
"""
Return a list of tuples of the (attr,formatted_value)
"""
attrs = [
('levels', ibase.default_pprint(self._levels,
max_seq_items=False)),
('labels', ibase.default_pprint(self._labels,
max_seq_items=False))]
if com._any_not_none(*self.names):
attrs.append(('names', ibase.default_pprint(self.names)))
if self.sortorder is not None:
attrs.append(('sortorder', ibase.default_pprint(self.sortorder)))
attrs = []
attrs.append(('dtype', "'{}'".format(self.dtype)))
if self.names is not None and any(self.names):
attrs.append(('names', default_pprint(self.names)))
max_seq_items = get_option('display.max_seq_items') or len(self)
if len(self) > max_seq_items:
attrs.append(('length', len(self)))
return attrs

def _format_space(self):
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
return " "

def _format_data(self, name=None):
# we are formatting thru the attributes
return None
"""
Return the formatted data as a unicode string
"""
return format_object_summary(self, self._formatter_func,
name=name, is_multi=True)

def __unicode__(self):
"""
Return a string representation for this MultiIndex.
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
py2/py3.
"""
klass = self.__class__.__name__
data = self._format_data()
attrs = self._format_attrs()
space = self._format_space()

prepr = (u(",%s") %
space).join(u("%s=%s") % (k, v) for k, v in attrs)

# no data provided, just attributes
if data is None:
data = ''

res = u("%s(%s%s)") % (klass, data, prepr)

return res

def __len__(self):
return len(self.labels[0])
Expand Down
80 changes: 65 additions & 15 deletions pandas/io/formats/printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ class TableSchemaFormatter(BaseFormatter):
max_seq_items=max_seq_items)


def format_object_summary(obj, formatter, is_justify=True, name=None):
def format_object_summary(obj, formatter, is_justify=True,
name=None, is_multi=False):
"""
Return the formatted obj as a unicode string
Expand All @@ -280,8 +281,10 @@ def format_object_summary(obj, formatter, is_justify=True, name=None):
string formatter for an element
is_justify : boolean
should justify the display
name : name, optiona
name : name, optional
defaults to the class name of the obj
is_multi : bool, default False
Is ``obj`` a :class:`MultiIndex` or not
Returns
-------
Expand All @@ -301,7 +304,7 @@ def format_object_summary(obj, formatter, is_justify=True, name=None):
space2 = "\n%s" % (' ' * (len(name) + 2))

n = len(obj)
sep = ','
sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
max_seq_items = get_option('display.max_seq_items') or n

# are we a truncated display
Expand All @@ -327,10 +330,10 @@ def best_len(values):

if n == 0:
summary = '[], '
elif n == 1:
elif n == 1 and not is_multi:
first = formatter(obj[0])
summary = '[%s], ' % first
elif n == 2:
elif n == 2 and not is_multi:
first = formatter(obj[0])
last = formatter(obj[-1])
summary = '[%s, %s], ' % (first, last)
Expand All @@ -346,15 +349,16 @@ def best_len(values):

# adjust all values to max length if needed
if is_justify:

# however, if we are not truncated and we are only a single
# line, then don't justify
if (is_truncated or
not (len(', '.join(head)) < display_width and
len(', '.join(tail)) < display_width)):
max_len = max(best_len(head), best_len(tail))
head = [x.rjust(max_len) for x in head]
tail = [x.rjust(max_len) for x in tail]
head, tail = _justify(head, tail, display_width, best_len,
is_truncated, is_multi)
if is_multi:
max_space = display_width - len(space2)
item = tail[0]
for i in reversed(range(1, len(item) + 1)):
if len(_pprint_seq(item, max_seq_items=i)) < max_space:
break
head = [_pprint_seq(x, max_seq_items=i) for x in head]
tail = [_pprint_seq(x, max_seq_items=i) for x in tail]

summary = ""
line = space2
Expand All @@ -380,7 +384,7 @@ def best_len(values):
summary += line
summary += '],'

if len(summary) > (display_width):
if len(summary) > (display_width) or is_multi:
summary += space1
else: # one row
summary += ' '
Expand All @@ -391,6 +395,52 @@ def best_len(values):
return summary


def _justify(head, tail, display_width, best_len,
is_truncated=False, is_multi=False):
"""
Justify each item in head and tail, so they align properly.
"""
if is_multi:
max_length = _max_level_item_length(head + tail)
head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
for seq in head]
tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
for seq in tail]
elif (is_truncated or not (len(', '.join(head)) < display_width and
len(', '.join(tail)) < display_width)):
max_length = max(best_len(head), best_len(tail))
head = [x.rjust(max_length) for x in head]
tail = [x.rjust(max_length) for x in tail]

return head, tail


def _max_level_item_length(seq):
"""
For each position for the sequences in ``seq``, find the largest length.
Used for justifying individual values in a :class:`pandas.MultiIndex`.
Parameters
----------
seq : list-like of list-likes of strings
Returns
-------
max_length : list of ints
Examples
--------
>>> _max_level_item_length([['s', 'ab'], ['abc', 'a']])
[3, 2]
"""
max_length = [0] * len(seq[0])
for inner_seq in seq:
length = [len(item) for item in inner_seq]
max_length = [max(x, y) for x, y in zip(max_length, length)]
return max_length


def format_object_attrs(obj):
"""
Return a list of tuples of the (attr, formatted_value)
Expand Down
Loading

0 comments on commit dd81bdd

Please sign in to comment.