Skip to content

Commit

Permalink
changed according to comments
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Aug 31, 2018
1 parent f92bb0d commit 906e0f7
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 80 deletions.
12 changes: 5 additions & 7 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,16 @@ difficult to navigate.

Outputting of ``MultiIndex`` instances now outputs tuples of each row and ensures
that the tuple items are vertically aligned, so it's now much much easier to
understand the structure of the ``MultiIndex``. (:issue:`13480`):
understand the structure of the ``MultiIndex``. Also, the outputs gets
truncated if it's large. (:issue:`13480`):

.. ipython:: python

index1=range(1000)
index2 = pd.Index(['a'] * 500 + ['abc'] * 500)
pd.MultiIndex.from_arrays([index1, index2])
pd.MultiIndex.from_product([['a', 'abc'], range(500)])

For number of rows smaller than :attr:`options.display.max_seq_items`, all
values will be shown (default: 100 items). Horizontally, the output will
If the number of rows is smaller than :attr:`options.display.max_seq_items`,
all values will be shown (default: 100 items). Horizontally, the output will
truncate, if it's longer than :attr:`options.display.width` (default: 80 characters).
This solves the problem with outputting large MultiIndex instances to the console.


.. _whatsnew_0240.enhancements.other:
Expand Down
30 changes: 3 additions & 27 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp

from pandas.compat import range, zip, lrange, lzip, map, u
from pandas.compat import range, zip, lrange, lzip, map
from pandas.compat.numpy import function as nv
from pandas import compat

Expand Down Expand Up @@ -619,8 +619,7 @@ def _format_attrs(self):
"""
Return a list of tuples of the (attr,formatted_value)
"""
attrs = []
attrs.append(('dtype', "'{}'".format(self.dtype)))
attrs = [('dtype', "'{}'".format(self.dtype))]
if self.names is not None and any(self.names):
attrs.append(('names', default_pprint(self.names)))
max_seq_items = get_option('display.max_seq_items') or len(self)
Expand All @@ -636,30 +635,7 @@ def _format_data(self, name=None):
Return the formatted data as a unicode string
"""
return format_object_summary(self, self._formatter_func,
name=name, is_multi=True)

def __unicode__(self):
"""
Return a string representation for this MultiIndex.
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
py2/py3.
"""
klass = self.__class__.__name__
data = self._format_data()
attrs = self._format_attrs()
space = self._format_space()

prepr = (u(",%s") %
space).join(u("%s=%s") % (k, v) for k, v in attrs)

# no data provided, just attributes
if data is None:
data = ''

res = u("%s(%s%s)") % (klass, data, prepr)

return res
name=name, line_break_each_value=True)

def __len__(self):
return len(self.labels[0])
Expand Down
77 changes: 53 additions & 24 deletions pandas/io/formats/printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ class TableSchemaFormatter(BaseFormatter):


def format_object_summary(obj, formatter, is_justify=True,
name=None, is_multi=False):
name=None, line_break_each_value=False):
"""
Return the formatted obj as a unicode string
Expand All @@ -283,8 +283,10 @@ def format_object_summary(obj, formatter, is_justify=True,
should justify the display
name : name, optional
defaults to the class name of the obj
is_multi : bool, default False
Is ``obj`` a :class:`MultiIndex` or not
line_break_each_value : bool, default False
If True, inserts a line break for each value of ``obj``.
If False, only break lines when the a line of values gets wider
than the display width
Returns
-------
Expand All @@ -304,7 +306,11 @@ def format_object_summary(obj, formatter, is_justify=True,
space2 = "\n%s" % (' ' * (len(name) + 2))

n = len(obj)
sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
if not line_break_each_value:
sep = ','
else:
# If we want to align on each value, we need a different separator.
sep = (',\n ' + ' ' * len(name))
max_seq_items = get_option('display.max_seq_items') or n

# are we a truncated display
Expand All @@ -330,10 +336,10 @@ def best_len(values):

if n == 0:
summary = '[], '
elif n == 1 and not is_multi:
elif n == 1 and not line_break_each_value:
first = formatter(obj[0])
summary = '[%s], ' % first
elif n == 2 and not is_multi:
elif n == 2 and not line_break_each_value:
first = formatter(obj[0])
last = formatter(obj[-1])
summary = '[%s, %s], ' % (first, last)
Expand All @@ -349,9 +355,15 @@ def best_len(values):

# adjust all values to max length if needed
if is_justify:
head, tail = _justify(head, tail, display_width, best_len,
is_truncated, is_multi)
if is_multi:
if line_break_each_value:
head, tail = _justify(head, tail)
elif (is_truncated or not (len(', '.join(head)) < display_width and
len(', '.join(tail)) < display_width)):
max_length = max(best_len(head), best_len(tail))
head = [x.rjust(max_length) for x in head]
tail = [x.rjust(max_length) for x in tail]

if line_break_each_value:
max_space = display_width - len(space2)
item = tail[0]
for i in reversed(range(1, len(item) + 1)):
Expand Down Expand Up @@ -384,7 +396,7 @@ def best_len(values):
summary += line
summary += '],'

if len(summary) > (display_width) or is_multi:
if len(summary) > (display_width) or line_break_each_value:
summary += space1
else: # one row
summary += ' '
Expand All @@ -395,23 +407,40 @@ def best_len(values):
return summary


def _justify(head, tail, display_width, best_len,
is_truncated=False, is_multi=False):
def _justify(head, tail):
"""
Justify each item in head and tail, so they align properly.
Justify each item in each list-like in head and tail, so each item
right-aligns when the two list-likes are stacked vertically.
Parameters
----------
head : list-like of list-likes of strings
tail : list-like of list-likes of strings
Returns
-------
head : list of tuples of strings
tail : list of tuples of strings
Examples
--------
>>> _justify([['a', 'b']], [['abc', 'abcd']])
([(' a', ' b')], [('abc', 'abcd')])
"""
if is_multi:
max_length = _max_level_item_length(head + tail)
head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
for seq in head]
tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
for seq in tail]
elif (is_truncated or not (len(', '.join(head)) < display_width and
len(', '.join(tail)) < display_width)):
max_length = max(best_len(head), best_len(tail))
head = [x.rjust(max_length) for x in head]
tail = [x.rjust(max_length) for x in tail]
combined = head + tail # type: List[str]

# For each position for the sequences in ``combined``,
# find the length of the largest string.
max_length = [0] * len(combined[0]) # type: List[int]
for inner_seq in combined:
length = [len(item) for item in inner_seq]
max_length = [max(x, y) for x, y in zip(max_length, length)]

# justify each item in each list-like in head and tail using max_length
head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
for seq in head]
tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
for seq in tail]
return head, tail


Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/indexes/multi/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import numpy as np
import pytest

import pandas as pd
from pandas import Index, MultiIndex


Expand Down Expand Up @@ -53,3 +55,28 @@ def holder():
def compat_props():
# a MultiIndex must have these properties associated with it
return ['shape', 'ndim', 'size']


@pytest.fixture
def narrow_multi_index():
"""
Return a MultiIndex that is less wide than the display (<80 characters).
"""
n = 1000
ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti],
names=['a', 'b', 'dti'])


@pytest.fixture
def wide_multi_index():
"""
Return a MultiIndex that is wider than the display (>80 characters).
"""
n = 1000
ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
levels = [ci, ci.codes + 9, dti, dti, dti]
names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3']
return pd.MultiIndex.from_arrays(levels, names=names)
Loading

0 comments on commit 906e0f7

Please sign in to comment.