Skip to content

Commit

Permalink
Make DataFrame.to_string output full content by default (pandas-dev#2…
Browse files Browse the repository at this point in the history
  • Loading branch information
Luke Shepard authored and proost committed Dec 19, 2019
1 parent 364e8af commit 357c8a3
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 26 deletions.
2 changes: 1 addition & 1 deletion doc/source/user_guide/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ display.max_colwidth 50 The maximum width in charac
a column in the repr of a pandas
data structure. When the column overflows,
a "..." placeholder is embedded in
the output.
the output. 'None' value means unlimited.
display.max_info_columns 100 max_info_columns is used in DataFrame.info
method to decide if per column information
will be printed.
Expand Down
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ including other versions of pandas.
Enhancements
~~~~~~~~~~~~

- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`)
-

.. _whatsnew_1000.enhancements.other:

Expand Down Expand Up @@ -191,6 +193,7 @@ I/O
- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`)
- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`)
- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`)
- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`)

Plotting
^^^^^^^^
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ def use_numexpr_cb(key):
"""

max_colwidth_doc = """
: int
: int or None
The maximum width in characters of a column in the repr of
a pandas data structure. When the column overflows, a "..."
placeholder is embedded in the output.
placeholder is embedded in the output. A 'None' value means unlimited.
"""

colheader_justify_doc = """
Expand Down Expand Up @@ -340,7 +340,9 @@ def is_terminal():
validator=is_instance_factory([type(None), int]),
)
cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int)
cf.register_option("max_colwidth", 50, max_colwidth_doc, validator=is_int)
cf.register_option(
"max_colwidth", 50, max_colwidth_doc, validator=is_nonnegative_int
)
if is_terminal():
max_cols = 0 # automatically determine optimal number of columns
else:
Expand Down
50 changes: 30 additions & 20 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ def __repr__(self):
max_rows = get_option("display.max_rows")
min_rows = get_option("display.min_rows")
max_cols = get_option("display.max_columns")
max_colwidth = get_option("display.max_colwidth")
show_dimensions = get_option("display.show_dimensions")
if get_option("display.expand_frame_repr"):
width, _ = console.get_console_size()
Expand All @@ -652,6 +653,7 @@ def __repr__(self):
min_rows=min_rows,
max_cols=max_cols,
line_width=width,
max_colwidth=max_colwidth,
show_dimensions=show_dimensions,
)

Expand Down Expand Up @@ -730,12 +732,17 @@ def to_string(
show_dimensions=False,
decimal=".",
line_width=None,
max_colwidth=None,
):
"""
Render a DataFrame to a console-friendly tabular output.
%(shared_params)s
line_width : int, optional
Width to wrap a line in characters.
max_colwidth : int, optional
Max width to truncate each column in characters. By default, no limit.
.. versionadded:: 1.0.0
%(returns)s
See Also
--------
Expand All @@ -752,26 +759,29 @@ def to_string(
2 3 6
"""

formatter = fmt.DataFrameFormatter(
self,
columns=columns,
col_space=col_space,
na_rep=na_rep,
formatters=formatters,
float_format=float_format,
sparsify=sparsify,
justify=justify,
index_names=index_names,
header=header,
index=index,
min_rows=min_rows,
max_rows=max_rows,
max_cols=max_cols,
show_dimensions=show_dimensions,
decimal=decimal,
line_width=line_width,
)
return formatter.to_string(buf=buf)
from pandas import option_context

with option_context("display.max_colwidth", max_colwidth):
formatter = fmt.DataFrameFormatter(
self,
columns=columns,
col_space=col_space,
na_rep=na_rep,
formatters=formatters,
float_format=float_format,
sparsify=sparsify,
justify=justify,
index_names=index_names,
header=header,
index=index,
min_rows=min_rows,
max_rows=max_rows,
max_cols=max_cols,
show_dimensions=show_dimensions,
decimal=decimal,
line_width=line_width,
)
return formatter.to_string(buf=buf)

# ----------------------------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/clipboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover

if isinstance(obj, ABCDataFrame):
# str(df) has various unhelpful defaults, like truncation
with option_context("display.max_colwidth", 999999):
with option_context("display.max_colwidth", None):
objstr = obj.to_string(**kwargs)
else:
objstr = str(obj)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def _write_header(self, indent: int) -> None:
self.write("</thead>", indent)

def _get_formatted_values(self) -> Dict[int, List[str]]:
with option_context("display.max_colwidth", 999999):
with option_context("display.max_colwidth", None):
fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)}
return fmt_values

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def test_validation(self):
self.cf.set_option("a", 2) # int is_int
self.cf.set_option("b.c", "wurld") # str is_str
self.cf.set_option("d", 2)
self.cf.set_option("d", None) # non-negative int can be None

# None not is_int
with pytest.raises(ValueError, match=msg):
Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,45 @@ def test_str_max_colwidth(self):
"1 foo bar stuff 1"
)

def test_to_string_truncate(self):
# GH 9784 - dont truncate when calling DataFrame.to_string
df = pd.DataFrame(
[
{
"a": "foo",
"b": "bar",
"c": "let's make this a very VERY long line that is longer "
"than the default 50 character limit",
"d": 1,
},
{"a": "foo", "b": "bar", "c": "stuff", "d": 1},
]
)
df.set_index(["a", "b", "c"])
assert df.to_string() == (
" a b "
" c d\n"
"0 foo bar let's make this a very VERY long line t"
"hat is longer than the default 50 character limit 1\n"
"1 foo bar "
" stuff 1"
)
with option_context("max_colwidth", 20):
# the display option has no effect on the to_string method
assert df.to_string() == (
" a b "
" c d\n"
"0 foo bar let's make this a very VERY long line t"
"hat is longer than the default 50 character limit 1\n"
"1 foo bar "
" stuff 1"
)
assert df.to_string(max_colwidth=20) == (
" a b c d\n"
"0 foo bar let's make this ... 1\n"
"1 foo bar stuff 1"
)

def test_auto_detect(self):
term_width, term_height = get_terminal_size()
fac = 1.05 # Arbitrary large factor to exceed term width
Expand Down

0 comments on commit 357c8a3

Please sign in to comment.