diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6a65e5e6f56a22..ee18351602fa99 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -172,18 +172,16 @@ difficult to navigate. Outputting of ``MultiIndex`` instances now outputs tuples of each row and ensures that the tuple items are vertically aligned, so it's now much much easier to -understand the structure of the ``MultiIndex``. (:issue:`13480`): +understand the structure of the ``MultiIndex``. Also, the outputs gets +truncated if it's large. (:issue:`13480`): .. ipython:: python - index1=range(1000) - index2 = pd.Index(['a'] * 500 + ['abc'] * 500) - pd.MultiIndex.from_arrays([index1, index2]) + pd.MultiIndex.from_product([['a', 'abc'], range(500)]) -For number of rows smaller than :attr:`options.display.max_seq_items`, all -values will be shown (default: 100 items). Horizontally, the output will +If the number of rows is smaller than :attr:`options.display.max_seq_items`, +all values will be shown (default: 100 items). Horizontally, the output will truncate, if it's longer than :attr:`options.display.width` (default: 80 characters). -This solves the problem with outputting large MultiIndex instances to the console. .. _whatsnew_0240.enhancements.other: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index bf540f49ffd1c4..c3492300e5d5d2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -7,7 +7,7 @@ import numpy as np from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp -from pandas.compat import range, zip, lrange, lzip, map, u +from pandas.compat import range, zip, lrange, lzip, map from pandas.compat.numpy import function as nv from pandas import compat @@ -619,8 +619,7 @@ def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ - attrs = [] - attrs.append(('dtype', "'{}'".format(self.dtype))) + attrs = [('dtype', "'{}'".format(self.dtype))] if self.names is not None and any(self.names): attrs.append(('names', default_pprint(self.names))) max_seq_items = get_option('display.max_seq_items') or len(self) @@ -636,30 +635,7 @@ def _format_data(self, name=None): Return the formatted data as a unicode string """ return format_object_summary(self, self._formatter_func, - name=name, is_multi=True) - - def __unicode__(self): - """ - Return a string representation for this MultiIndex. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - klass = self.__class__.__name__ - data = self._format_data() - attrs = self._format_attrs() - space = self._format_space() - - prepr = (u(",%s") % - space).join(u("%s=%s") % (k, v) for k, v in attrs) - - # no data provided, just attributes - if data is None: - data = '' - - res = u("%s(%s%s)") % (klass, data, prepr) - - return res + name=name, line_break_each_value=True) def __len__(self): return len(self.labels[0]) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 7f0ba90afb8774..975ce6f1005f28 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -269,7 +269,7 @@ class TableSchemaFormatter(BaseFormatter): def format_object_summary(obj, formatter, is_justify=True, - name=None, is_multi=False): + name=None, line_break_each_value=False): """ Return the formatted obj as a unicode string @@ -283,8 +283,10 @@ def format_object_summary(obj, formatter, is_justify=True, should justify the display name : name, optional defaults to the class name of the obj - is_multi : bool, default False - Is ``obj`` a :class:`MultiIndex` or not + line_break_each_value : bool, default False + If True, inserts a line break for each value of ``obj``. + If False, only break lines when the a line of values gets wider + than the display width Returns ------- @@ -304,7 +306,11 @@ def format_object_summary(obj, formatter, is_justify=True, space2 = "\n%s" % (' ' * (len(name) + 2)) n = len(obj) - sep = ',' if not is_multi else (',\n ' + ' ' * len(name)) + if not line_break_each_value: + sep = ',' + else: + # If we want to align on each value, we need a different separator. + sep = (',\n ' + ' ' * len(name)) max_seq_items = get_option('display.max_seq_items') or n # are we a truncated display @@ -330,10 +336,10 @@ def best_len(values): if n == 0: summary = '[], ' - elif n == 1 and not is_multi: + elif n == 1 and not line_break_each_value: first = formatter(obj[0]) summary = '[%s], ' % first - elif n == 2 and not is_multi: + elif n == 2 and not line_break_each_value: first = formatter(obj[0]) last = formatter(obj[-1]) summary = '[%s, %s], ' % (first, last) @@ -349,9 +355,15 @@ def best_len(values): # adjust all values to max length if needed if is_justify: - head, tail = _justify(head, tail, display_width, best_len, - is_truncated, is_multi) - if is_multi: + if line_break_each_value: + head, tail = _justify(head, tail) + elif (is_truncated or not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): + max_length = max(best_len(head), best_len(tail)) + head = [x.rjust(max_length) for x in head] + tail = [x.rjust(max_length) for x in tail] + + if line_break_each_value: max_space = display_width - len(space2) item = tail[0] for i in reversed(range(1, len(item) + 1)): @@ -384,7 +396,7 @@ def best_len(values): summary += line summary += '],' - if len(summary) > (display_width) or is_multi: + if len(summary) > (display_width) or line_break_each_value: summary += space1 else: # one row summary += ' ' @@ -395,23 +407,40 @@ def best_len(values): return summary -def _justify(head, tail, display_width, best_len, - is_truncated=False, is_multi=False): +def _justify(head, tail): """ - Justify each item in head and tail, so they align properly. + Justify each item in each list-like in head and tail, so each item + right-aligns when the two list-likes are stacked vertically. + + Parameters + ---------- + head : list-like of list-likes of strings + tail : list-like of list-likes of strings + + Returns + ------- + head : list of tuples of strings + tail : list of tuples of strings + + Examples + -------- + >>> _justify([['a', 'b']], [['abc', 'abcd']]) + ([(' a', ' b')], [('abc', 'abcd')]) """ - if is_multi: - max_length = _max_level_item_length(head + tail) - head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) - for seq in head] - tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) - for seq in tail] - elif (is_truncated or not (len(', '.join(head)) < display_width and - len(', '.join(tail)) < display_width)): - max_length = max(best_len(head), best_len(tail)) - head = [x.rjust(max_length) for x in head] - tail = [x.rjust(max_length) for x in tail] + combined = head + tail # type: List[str] + + # For each position for the sequences in ``combined``, + # find the length of the largest string. + max_length = [0] * len(combined[0]) # type: List[int] + for inner_seq in combined: + length = [len(item) for item in inner_seq] + max_length = [max(x, y) for x, y in zip(max_length, length)] + # justify each item in each list-like in head and tail using max_length + head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) + for seq in head] + tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) + for seq in tail] return head, tail diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index afe651d22c6a76..55f9ae589ec3a2 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -2,6 +2,8 @@ import numpy as np import pytest + +import pandas as pd from pandas import Index, MultiIndex @@ -53,3 +55,28 @@ def holder(): def compat_props(): # a MultiIndex must have these properties associated with it return ['shape', 'ndim', 'size'] + + +@pytest.fixture +def narrow_multi_index(): + """ + Return a MultiIndex that is less wide than the display (<80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) + dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) + return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], + names=['a', 'b', 'dti']) + + +@pytest.fixture +def wide_multi_index(): + """ + Return a MultiIndex that is wider than the display (>80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) + dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) + levels = [ci, ci.codes + 9, dti, dti, dti] + names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3'] + return pd.MultiIndex.from_arrays(levels, names=names) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 6daece47bd7852..8af5400d86eca0 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -59,6 +59,13 @@ def test_repr_with_unicode_data(): assert "\\u" not in repr(index) # we don't want unicode-escaped +def test_repr_roundtrip_raises(): + mi = MultiIndex.from_product([list('ab'), range(3)], + names=['first', 'second']) + with pytest.raises(TypeError): + eval(repr(mi)) + + def test_unicode_string_with_unicode(): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index @@ -90,25 +97,16 @@ def test_repr_max_seq_item_setting(idx): @pytest.mark.skipif(PY2, reason="repr output is different for python2") class TestRepr(object): - def setup_class(self): - n = 1000 - ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n)) - dti = pd.date_range('2000-01-01', freq='s', periods=n * 2) - self.narrow_mi = pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], - names=['a', 'b', 'dti']) - - levels = [ci, ci.codes + 9, dti, dti, dti] - names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3'] - self.wide_mi = pd.MultiIndex.from_arrays(levels, names=names) - def test_repr(self, idx): result = idx[:1].__repr__() - expected = """MultiIndex([('foo', 'one')], + expected = """\ +MultiIndex([('foo', 'one')], dtype='object', names=['first', 'second'])""" assert result == expected result = idx.__repr__() - expected = """MultiIndex([('foo', 'one'), + expected = """\ +MultiIndex([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('baz', 'two'), @@ -119,7 +117,8 @@ def test_repr(self, idx): with pd.option_context('display.max_seq_items', 5): result = idx.__repr__() - expected = """MultiIndex([('foo', 'one'), + expected = """\ +MultiIndex([('foo', 'one'), ('foo', 'two'), ... ('qux', 'one'), @@ -127,14 +126,15 @@ def test_repr(self, idx): dtype='object', names=['first', 'second'], length=6)""" assert result == expected - def test_rjust(self): - result = self.narrow_mi[:1].__repr__() + def test_rjust(self, narrow_multi_index): + mi = narrow_multi_index + result = mi[:1].__repr__() expected = """\ MultiIndex([('a', 9, '2000-01-01 00:00:00')], dtype='object', names=['a', 'b', 'dti'])""" assert result == expected - result = self.narrow_mi[::500].__repr__() + result = mi[::500].__repr__() expected = """\ MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), ( 'a', 9, '2000-01-01 00:08:20'), @@ -143,7 +143,7 @@ def test_rjust(self): dtype='object', names=['a', 'b', 'dti'])""" assert result == expected - result = self.narrow_mi.__repr__() + result = mi.__repr__() expected = """\ MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), ( 'a', 9, '2000-01-01 00:00:01'), @@ -169,13 +169,14 @@ def test_rjust(self): dtype='object', names=['a', 'b', 'dti'], length=2000)""" assert result == expected - def test_tuple_width(self): - result = self.wide_mi[:1].__repr__() + def test_tuple_width(self, wide_multi_index): + mi = wide_multi_index + result = mi[:1].__repr__() expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" assert result == expected - result = self.wide_mi[:10].__repr__() + result = mi[:10].__repr__() expected = """\ MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), @@ -190,7 +191,7 @@ def test_tuple_width(self): dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" assert result == expected - result = self.wide_mi.__repr__() + result = mi.__repr__() expected = """\ MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), @@ -215,3 +216,127 @@ def test_tuple_width(self): ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa assert result == expected + + +@pytest.mark.skipif(not PY2, reason="repr output is different for python2") +class TestReprPy2(object): + + def test_repr(self, idx): + result = idx[:1].__repr__() + expected = """\ +MultiIndex([(u'foo', u'one')], + dtype='object', names=[u'first', u'second'])""" + assert result == expected + + result = idx.__repr__() + expected = """\ +MultiIndex([(u'foo', u'one'), + (u'foo', u'two'), + (u'bar', u'one'), + (u'baz', u'two'), + (u'qux', u'one'), + (u'qux', u'two')], + dtype='object', names=[u'first', u'second'])""" + assert result == expected + + with pd.option_context('display.max_seq_items', 5): + result = idx.__repr__() + expected = """\ +MultiIndex([(u'foo', u'one'), + (u'foo', u'two'), + ... + (u'qux', u'one'), + (u'qux', u'two')], + dtype='object', names=[u'first', u'second'], length=6)""" + assert result == expected + + def test_rjust(self, narrow_multi_index): + mi = narrow_multi_index + result = mi[:1].__repr__() + expected = """\ +MultiIndex([(u'a', 9, '2000-01-01 00:00:00')], + dtype='object', names=[u'a', u'b', u'dti'])""" + assert result == expected + + result = mi[::500].__repr__() + expected = """\ +MultiIndex([( u'a', 9, '2000-01-01 00:00:00'), + ( u'a', 9, '2000-01-01 00:08:20'), + (u'abc', 10, '2000-01-01 00:16:40'), + (u'abc', 10, '2000-01-01 00:25:00')], + dtype='object', names=[u'a', u'b', u'dti'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( u'a', 9, '2000-01-01 00:00:00'), + ( u'a', 9, '2000-01-01 00:00:01'), + ( u'a', 9, '2000-01-01 00:00:02'), + ( u'a', 9, '2000-01-01 00:00:03'), + ( u'a', 9, '2000-01-01 00:00:04'), + ( u'a', 9, '2000-01-01 00:00:05'), + ( u'a', 9, '2000-01-01 00:00:06'), + ( u'a', 9, '2000-01-01 00:00:07'), + ( u'a', 9, '2000-01-01 00:00:08'), + ( u'a', 9, '2000-01-01 00:00:09'), + ... + (u'abc', 10, '2000-01-01 00:33:10'), + (u'abc', 10, '2000-01-01 00:33:11'), + (u'abc', 10, '2000-01-01 00:33:12'), + (u'abc', 10, '2000-01-01 00:33:13'), + (u'abc', 10, '2000-01-01 00:33:14'), + (u'abc', 10, '2000-01-01 00:33:15'), + (u'abc', 10, '2000-01-01 00:33:16'), + (u'abc', 10, '2000-01-01 00:33:17'), + (u'abc', 10, '2000-01-01 00:33:18'), + (u'abc', 10, '2000-01-01 00:33:19')], + dtype='object', names=[u'a', u'b', u'dti'], length=2000)""" + assert result == expected + + def test_tuple_width(self, wide_multi_index): + mi = wide_multi_index + result = mi[:1].__repr__() + expected = """MultiIndex([(u'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], + dtype='object', names=[u'a', u'b', u'dti_1', u'dti_2', u'dti_3'])""" + assert result == expected + + result = mi[:10].__repr__() + expected = """\ +MultiIndex([(u'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + (u'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + (u'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + (u'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + (u'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + (u'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + (u'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + (u'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + (u'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + (u'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], + dtype='object', names=[u'a', u'b', u'dti_1', u'dti_2', u'dti_3'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( u'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ( u'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ( u'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ( u'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ( u'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ( u'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ( u'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ( u'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ( u'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ( u'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), + ... + (u'abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), + (u'abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), + (u'abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), + (u'abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), + (u'abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), + (u'abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), + (u'abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), + (u'abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), + (u'abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), + (u'abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], + dtype='object', names=[u'a', u'b', u'dti_1', u'dti_2', u'dti_3'], length=2000)""" # noqa + assert result == expected