diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index 1f1dff417e68f3..a6491c66456132 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -353,7 +353,7 @@ display.max_colwidth 50 The maximum width in charac a column in the repr of a pandas data structure. When the column overflows, a "..." placeholder is embedded in - the output. + the output. 'None' value means unlimited. display.max_info_columns 100 max_info_columns is used in DataFrame.info method to decide if per column information will be printed. diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5b9e3a7dbad061..c78e27f098f13b 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -21,6 +21,8 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ +- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`) +- .. _whatsnew_1000.enhancements.other: @@ -191,6 +193,7 @@ I/O - Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) - Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`) - Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`) +- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) Plotting ^^^^^^^^ diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index dfc80140433f8e..bc2eb3511629da 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -148,10 +148,10 @@ def use_numexpr_cb(key): """ max_colwidth_doc = """ -: int +: int or None The maximum width in characters of a column in the repr of a pandas data structure. When the column overflows, a "..." - placeholder is embedded in the output. + placeholder is embedded in the output. A 'None' value means unlimited. """ colheader_justify_doc = """ @@ -340,7 +340,9 @@ def is_terminal(): validator=is_instance_factory([type(None), int]), ) cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int) - cf.register_option("max_colwidth", 50, max_colwidth_doc, validator=is_int) + cf.register_option( + "max_colwidth", 50, max_colwidth_doc, validator=is_nonnegative_int + ) if is_terminal(): max_cols = 0 # automatically determine optimal number of columns else: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f1ed3a125f60c1..44d3d840016fe1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -641,6 +641,7 @@ def __repr__(self): max_rows = get_option("display.max_rows") min_rows = get_option("display.min_rows") max_cols = get_option("display.max_columns") + max_colwidth = get_option("display.max_colwidth") show_dimensions = get_option("display.show_dimensions") if get_option("display.expand_frame_repr"): width, _ = console.get_console_size() @@ -652,6 +653,7 @@ def __repr__(self): min_rows=min_rows, max_cols=max_cols, line_width=width, + max_colwidth=max_colwidth, show_dimensions=show_dimensions, ) @@ -730,12 +732,17 @@ def to_string( show_dimensions=False, decimal=".", line_width=None, + max_colwidth=None, ): """ Render a DataFrame to a console-friendly tabular output. %(shared_params)s line_width : int, optional Width to wrap a line in characters. + max_colwidth : int, optional + Max width to truncate each column in characters. By default, no limit. + + .. versionadded:: 1.0.0 %(returns)s See Also -------- @@ -752,26 +759,29 @@ def to_string( 2 3 6 """ - formatter = fmt.DataFrameFormatter( - self, - columns=columns, - col_space=col_space, - na_rep=na_rep, - formatters=formatters, - float_format=float_format, - sparsify=sparsify, - justify=justify, - index_names=index_names, - header=header, - index=index, - min_rows=min_rows, - max_rows=max_rows, - max_cols=max_cols, - show_dimensions=show_dimensions, - decimal=decimal, - line_width=line_width, - ) - return formatter.to_string(buf=buf) + from pandas import option_context + + with option_context("display.max_colwidth", max_colwidth): + formatter = fmt.DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, + justify=justify, + index_names=index_names, + header=header, + index=index, + min_rows=min_rows, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=decimal, + line_width=line_width, + ) + return formatter.to_string(buf=buf) # ---------------------------------------------------------------------- diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 76c01535a26e79..518b940ec5da3d 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -131,7 +131,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover if isinstance(obj, ABCDataFrame): # str(df) has various unhelpful defaults, like truncation - with option_context("display.max_colwidth", 999999): + with option_context("display.max_colwidth", None): objstr = obj.to_string(**kwargs) else: objstr = str(obj) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 8c4a7f4a1213d9..50fa4796f8d722 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -377,7 +377,7 @@ def _write_header(self, indent: int) -> None: self.write("", indent) def _get_formatted_values(self) -> Dict[int, List[str]]: - with option_context("display.max_colwidth", 999999): + with option_context("display.max_colwidth", None): fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)} return fmt_values diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py index efaeb7b1471eca..51640641c78e69 100644 --- a/pandas/tests/config/test_config.py +++ b/pandas/tests/config/test_config.py @@ -218,6 +218,7 @@ def test_validation(self): self.cf.set_option("a", 2) # int is_int self.cf.set_option("b.c", "wurld") # str is_str self.cf.set_option("d", 2) + self.cf.set_option("d", None) # non-negative int can be None # None not is_int with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c0451a0672c89f..454e2afb8abe01 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -527,6 +527,45 @@ def test_str_max_colwidth(self): "1 foo bar stuff 1" ) + def test_to_string_truncate(self): + # GH 9784 - dont truncate when calling DataFrame.to_string + df = pd.DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "let's make this a very VERY long line that is longer " + "than the default 50 character limit", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) + assert df.to_string() == ( + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" + ) + with option_context("max_colwidth", 20): + # the display option has no effect on the to_string method + assert df.to_string() == ( + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" + ) + assert df.to_string(max_colwidth=20) == ( + " a b c d\n" + "0 foo bar let's make this ... 1\n" + "1 foo bar stuff 1" + ) + def test_auto_detect(self): term_width, term_height = get_terminal_size() fac = 1.05 # Arbitrary large factor to exceed term width