From af444f08ef10f30f90c2c904e0bf3294133b9558 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Tue, 20 Aug 2019 20:29:06 -0500 Subject: [PATCH 01/20] Added a parameter to pass all the way down to specify max_colwidth. Not sure if I'll keep it. --- pandas/core/frame.py | 7 ++++++- pandas/io/formats/format.py | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 20398069847b1..f4ada3d843e56 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -637,6 +637,7 @@ def __repr__(self): max_rows = get_option("display.max_rows") min_rows = get_option("display.min_rows") max_cols = get_option("display.max_columns") + max_colwidth = get_option("display.max_colwidth") show_dimensions = get_option("display.show_dimensions") if get_option("display.expand_frame_repr"): width, _ = console.get_console_size() @@ -648,6 +649,7 @@ def __repr__(self): min_rows=min_rows, max_cols=max_cols, line_width=width, + max_colwidth=max_colwidth, show_dimensions=show_dimensions, ) @@ -707,11 +709,14 @@ def to_string( max_cols=None, show_dimensions=False, decimal=".", + max_colwidth=None, line_width=None, ): """ Render a DataFrame to a console-friendly tabular output. %(shared_params)s + max_colwidth : int, optional + Max width to truncate each column in characters. line_width : int, optional Width to wrap a line in characters. %(returns)s @@ -729,7 +734,6 @@ def to_string( 1 2 5 2 3 6 """ - formatter = fmt.DataFrameFormatter( self, columns=columns, @@ -747,6 +751,7 @@ def to_string( max_cols=max_cols, show_dimensions=show_dimensions, decimal=decimal, + max_colwidth=max_colwidth, line_width=line_width, ) return formatter.to_string(buf=buf) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 61af935bd8227..dc44f782164b2 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -545,6 +545,7 @@ def __init__( max_rows: Optional[int] = None, min_rows: Optional[int] = None, max_cols: Optional[int] = None, + max_colwidth: Optional[int] = None, show_dimensions: bool = False, decimal: str = ".", table_id: Optional[str] = None, @@ -570,6 +571,7 @@ def __init__( self.max_rows = max_rows self.min_rows = min_rows self.max_cols = max_cols + self.max_colwidth = max_colwidth self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame)) self.show_dimensions = show_dimensions self.table_id = table_id @@ -692,11 +694,12 @@ def _to_str_columns(self) -> List[List[str]]: stringified = [] for i, c in enumerate(frame): fmt_values = self._format_col(i) - fmt_values = _make_fixed_width( + mt_values = _make_fixed_width( fmt_values, self.justify, minimum=(self.col_space or 0), adj=self.adj, + max_colwidth=self.max_colwidth, ) stringified.append(fmt_values) else: @@ -728,7 +731,11 @@ def _to_str_columns(self) -> List[List[str]]: ) fmt_values = self._format_col(i) fmt_values = _make_fixed_width( - fmt_values, self.justify, minimum=header_colwidth, adj=self.adj + fmt_values, + self.justify, + minimum=header_colwidth, + max_colwidth=self.max_colwidth, + adj=self.adj, ) max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth) @@ -1688,6 +1695,7 @@ def _make_fixed_width( justify: str = "right", minimum: Optional[int] = None, adj: Optional[TextAdjustment] = None, + max_colwidth: Optional[int] = None, ) -> List[str]: if len(strings) == 0 or justify == "all": @@ -1705,6 +1713,10 @@ def _make_fixed_width( if conf_max is not None and max_len > conf_max: max_len = conf_max + # override the default if provided + if max_colwidth is not None: + max_len = max(max_len, max_colwidth) + def just(x): if conf_max is not None: if (conf_max > 3) & (adj.len(x) > max_len): From 00a43cc57440202806a2cdde3082fbada9933855 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Tue, 20 Aug 2019 21:15:14 -0500 Subject: [PATCH 02/20] I have threaded the max_colwidth parameter all over the place, but I'm starting to feel a bit uncomfortable about it. The max_colwidth is an important feature for legibility in the vast majority of contexts - and one expects the display config setting to work. It is only when invoked at the highest level as to_string() that it should be unlimited. So even though this is a temp commit, I'm about to unwind it I think and try an approach at the top level only.: --- pandas/core/series.py | 11 +++++++++++ pandas/io/formats/format.py | 20 ++++++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c891298d6e499..bc2b82c9b11df 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1571,6 +1571,11 @@ def __repr__(self): if get_option("display.max_rows") == 0 else get_option("display.min_rows") ) + max_colwidth = ( + None + if get_option("display.max_colwidth") == 0 + else get_option("display.min_colwidth") + ) show_dimensions = get_option("display.show_dimensions") self.to_string( @@ -1579,6 +1584,7 @@ def __repr__(self): dtype=self.dtype, min_rows=min_rows, max_rows=max_rows, + max_colwidth=max_colwidth, length=show_dimensions, ) result = buf.getvalue() @@ -1597,6 +1603,7 @@ def to_string( name=False, max_rows=None, min_rows=None, + max_colwidth=None, ): """ Render a string representation of the Series. @@ -1626,6 +1633,9 @@ def to_string( min_rows : int, optional The number of rows to display in a truncated repr (when number of rows is above `max_rows`). + max_colwidth : int, optional + Maximum number of characters to show in each cell before truncating. + If None, show the full content. Returns ------- @@ -1644,6 +1654,7 @@ def to_string( float_format=float_format, min_rows=min_rows, max_rows=max_rows, + max_colwidth=max_colwidth, ) result = formatter.to_string() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index dc44f782164b2..41e9c0f875879 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -133,6 +133,9 @@ (when number of rows is above `max_rows`). max_cols : int, optional Maximum number of columns to display in the console. + max_colwidth : int, optional + Maximum number of characters to show in each cell before truncating. + If None, show the full content. show_dimensions : bool, default False Display DataFrame dimensions (number of rows by number of columns). decimal : str, default '.' @@ -239,6 +242,7 @@ def __init__( dtype: bool = True, max_rows: Optional[int] = None, min_rows: Optional[int] = None, + max_colwidth: Optional[int] = None, ): self.series = series self.buf = buf if buf is not None else StringIO() @@ -249,6 +253,7 @@ def __init__( self.index = index self.max_rows = max_rows self.min_rows = min_rows + self.max_colwidth = max_colwidth if float_format is None: float_format = get_option("display.float_format") @@ -336,6 +341,7 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]: return fmt_index, have_header def _get_formatted_values(self) -> List[str]: + return format_array( self.tr_series._values, None, @@ -1025,7 +1031,10 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: fmt_index = [ tuple( _make_fixed_width( - list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj + list(x), + justify="left", + minimum=(self.col_space or 0), + adj=self.adjddddd, ) ) for x in fmt_index @@ -1709,13 +1718,8 @@ def _make_fixed_width( if minimum is not None: max_len = max(minimum, max_len) - conf_max = get_option("display.max_colwidth") - if conf_max is not None and max_len > conf_max: - max_len = conf_max - - # override the default if provided - if max_colwidth is not None: - max_len = max(max_len, max_colwidth) + if max_colwidth is not None and max_len > conf_max: + max_len = max_colwidth def just(x): if conf_max is not None: From 1ebf09169e2454195e3e537d921c4086b290c2ec Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Tue, 20 Aug 2019 21:30:36 -0500 Subject: [PATCH 03/20] Ok, I removed all the deep changes and parameter-passing. Instead, we have a quick override at the very top level, and everything else behaves based on that one override.# --- pandas/core/frame.py | 47 +++++++++++++++++++------------------ pandas/core/series.py | 11 --------- pandas/io/formats/format.py | 28 +++++----------------- 3 files changed, 30 insertions(+), 56 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f4ada3d843e56..ab6e1f9812ff3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -22,7 +22,6 @@ import numpy.ma as ma from pandas._config import get_option - from pandas._libs import algos as libalgos, lib from pandas.compat import PY36, raise_with_traceback from pandas.compat.numpy import function as nv @@ -709,7 +708,7 @@ def to_string( max_cols=None, show_dimensions=False, decimal=".", - max_colwidth=None, + max_colwidth=0, line_width=None, ): """ @@ -734,27 +733,29 @@ def to_string( 1 2 5 2 3 6 """ - formatter = fmt.DataFrameFormatter( - self, - columns=columns, - col_space=col_space, - na_rep=na_rep, - formatters=formatters, - float_format=float_format, - sparsify=sparsify, - justify=justify, - index_names=index_names, - header=header, - index=index, - min_rows=min_rows, - max_rows=max_rows, - max_cols=max_cols, - show_dimensions=show_dimensions, - decimal=decimal, - max_colwidth=max_colwidth, - line_width=line_width, - ) - return formatter.to_string(buf=buf) + + from pandas import option_context + with option_context('display.max_colwidth', max_colwidth): + formatter = fmt.DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, + justify=justify, + index_names=index_names, + header=header, + index=index, + min_rows=min_rows, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=decimal, + line_width=line_width, + ) + return formatter.to_string(buf=buf) # ---------------------------------------------------------------------- diff --git a/pandas/core/series.py b/pandas/core/series.py index bc2b82c9b11df..c891298d6e499 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1571,11 +1571,6 @@ def __repr__(self): if get_option("display.max_rows") == 0 else get_option("display.min_rows") ) - max_colwidth = ( - None - if get_option("display.max_colwidth") == 0 - else get_option("display.min_colwidth") - ) show_dimensions = get_option("display.show_dimensions") self.to_string( @@ -1584,7 +1579,6 @@ def __repr__(self): dtype=self.dtype, min_rows=min_rows, max_rows=max_rows, - max_colwidth=max_colwidth, length=show_dimensions, ) result = buf.getvalue() @@ -1603,7 +1597,6 @@ def to_string( name=False, max_rows=None, min_rows=None, - max_colwidth=None, ): """ Render a string representation of the Series. @@ -1633,9 +1626,6 @@ def to_string( min_rows : int, optional The number of rows to display in a truncated repr (when number of rows is above `max_rows`). - max_colwidth : int, optional - Maximum number of characters to show in each cell before truncating. - If None, show the full content. Returns ------- @@ -1654,7 +1644,6 @@ def to_string( float_format=float_format, min_rows=min_rows, max_rows=max_rows, - max_colwidth=max_colwidth, ) result = formatter.to_string() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 41e9c0f875879..61af935bd8227 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -133,9 +133,6 @@ (when number of rows is above `max_rows`). max_cols : int, optional Maximum number of columns to display in the console. - max_colwidth : int, optional - Maximum number of characters to show in each cell before truncating. - If None, show the full content. show_dimensions : bool, default False Display DataFrame dimensions (number of rows by number of columns). decimal : str, default '.' @@ -242,7 +239,6 @@ def __init__( dtype: bool = True, max_rows: Optional[int] = None, min_rows: Optional[int] = None, - max_colwidth: Optional[int] = None, ): self.series = series self.buf = buf if buf is not None else StringIO() @@ -253,7 +249,6 @@ def __init__( self.index = index self.max_rows = max_rows self.min_rows = min_rows - self.max_colwidth = max_colwidth if float_format is None: float_format = get_option("display.float_format") @@ -341,7 +336,6 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]: return fmt_index, have_header def _get_formatted_values(self) -> List[str]: - return format_array( self.tr_series._values, None, @@ -551,7 +545,6 @@ def __init__( max_rows: Optional[int] = None, min_rows: Optional[int] = None, max_cols: Optional[int] = None, - max_colwidth: Optional[int] = None, show_dimensions: bool = False, decimal: str = ".", table_id: Optional[str] = None, @@ -577,7 +570,6 @@ def __init__( self.max_rows = max_rows self.min_rows = min_rows self.max_cols = max_cols - self.max_colwidth = max_colwidth self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame)) self.show_dimensions = show_dimensions self.table_id = table_id @@ -700,12 +692,11 @@ def _to_str_columns(self) -> List[List[str]]: stringified = [] for i, c in enumerate(frame): fmt_values = self._format_col(i) - mt_values = _make_fixed_width( + fmt_values = _make_fixed_width( fmt_values, self.justify, minimum=(self.col_space or 0), adj=self.adj, - max_colwidth=self.max_colwidth, ) stringified.append(fmt_values) else: @@ -737,11 +728,7 @@ def _to_str_columns(self) -> List[List[str]]: ) fmt_values = self._format_col(i) fmt_values = _make_fixed_width( - fmt_values, - self.justify, - minimum=header_colwidth, - max_colwidth=self.max_colwidth, - adj=self.adj, + fmt_values, self.justify, minimum=header_colwidth, adj=self.adj ) max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth) @@ -1031,10 +1018,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: fmt_index = [ tuple( _make_fixed_width( - list(x), - justify="left", - minimum=(self.col_space or 0), - adj=self.adjddddd, + list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj ) ) for x in fmt_index @@ -1704,7 +1688,6 @@ def _make_fixed_width( justify: str = "right", minimum: Optional[int] = None, adj: Optional[TextAdjustment] = None, - max_colwidth: Optional[int] = None, ) -> List[str]: if len(strings) == 0 or justify == "all": @@ -1718,8 +1701,9 @@ def _make_fixed_width( if minimum is not None: max_len = max(minimum, max_len) - if max_colwidth is not None and max_len > conf_max: - max_len = max_colwidth + conf_max = get_option("display.max_colwidth") + if conf_max is not None and max_len > conf_max: + max_len = conf_max def just(x): if conf_max is not None: From 21bbf64f672fbe5d65ee32e2d300991ad6bf3d9c Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Tue, 20 Aug 2019 21:47:28 -0500 Subject: [PATCH 04/20] For some reason, the truncation switches justification if the max_colwidth is 0 instead of a large number. So I set it to a large number (like the html diff) to preserve the justification behavior. --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ab6e1f9812ff3..026f3e92aa9c9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -708,14 +708,14 @@ def to_string( max_cols=None, show_dimensions=False, decimal=".", - max_colwidth=0, + max_colwidth=9999999, line_width=None, ): """ Render a DataFrame to a console-friendly tabular output. %(shared_params)s max_colwidth : int, optional - Max width to truncate each column in characters. + Max width to truncate each column in characters. By default, no limit. line_width : int, optional Width to wrap a line in characters. %(returns)s From cfde48ed9d203ff962ba15c0c0ef2202e6bf2730 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Tue, 20 Aug 2019 21:56:58 -0500 Subject: [PATCH 05/20] Added a test to show that this option exists for to_string --- pandas/tests/io/formats/test_format.py | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index a048e3bb867bd..d451134a8fc79 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -520,6 +520,38 @@ def test_str_max_colwidth(self): "1 foo bar stuff 1" ) + def test_to_string_truncate(self): + # GH 9784 - dont truncate when calling DataFrame.to_string + df = pd.DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "let's make this a very VERY long line that is longer than the default 50 character limit", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) + assert df.to_string() == ( + " a b c d\n" + "0 foo bar let's make this a very VERY long line that is longer than the default 50 character limit 1\n" + "1 foo bar stuff 1" + ) + with option_context("max_colwidth", 20): + # the display option has no effect on the to_string method + assert df.to_string() == ( + " a b c d\n" + "0 foo bar let's make this a very VERY long line that is longer than the default 50 character limit 1\n" + "1 foo bar stuff 1" + ) + assert df.to_string(max_colwidth=20) == ( + " a b c d\n" + "0 foo bar let's make this ... 1\n" + "1 foo bar stuff 1" + ) + def test_auto_detect(self): term_width, term_height = get_terminal_size() fac = 1.05 # Arbitrary large factor to exceed term width From 1abc2fab658368d00cc967e48188c9596712363c Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Tue, 20 Aug 2019 22:19:34 -0500 Subject: [PATCH 06/20] Shortened one line (split across two). It's hard to actually shorten the expected values though because the fixed width will be harder to read if the lines are split, and they kind of have to be long to test the truncation... --- pandas/tests/io/formats/test_format.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index d451134a8fc79..d28973f7d6e20 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -527,7 +527,8 @@ def test_to_string_truncate(self): { "a": "foo", "b": "bar", - "c": "let's make this a very VERY long line that is longer than the default 50 character limit", + "c": "let's make this a very VERY long line that is longer " + "than the default 50 character limit", "d": 1, }, {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, From a1d3832b2bf70299eec604894857831ead12fddf Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Wed, 21 Aug 2019 11:12:56 -0500 Subject: [PATCH 07/20] Shortened all the lines even in the test to comply with PEP8 --- pandas/tests/io/formats/test_format.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index d28973f7d6e20..459437a1f333e 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -536,16 +536,22 @@ def test_to_string_truncate(self): ) df.set_index(["a", "b", "c"]) assert df.to_string() == ( - " a b c d\n" - "0 foo bar let's make this a very VERY long line that is longer than the default 50 character limit 1\n" - "1 foo bar stuff 1" + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" ) with option_context("max_colwidth", 20): # the display option has no effect on the to_string method assert df.to_string() == ( - " a b c d\n" - "0 foo bar let's make this a very VERY long line that is longer than the default 50 character limit 1\n" - "1 foo bar stuff 1" + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" ) assert df.to_string(max_colwidth=20) == ( " a b c d\n" From 3cf9a6aa5f5553535d69a80c510f53ab20c57d5a Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Thu, 22 Aug 2019 10:21:26 -0500 Subject: [PATCH 08/20] Adding a newline per suggestion from isort --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 026f3e92aa9c9..e676bd1add288 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -22,6 +22,7 @@ import numpy.ma as ma from pandas._config import get_option + from pandas._libs import algos as libalgos, lib from pandas.compat import PY36, raise_with_traceback from pandas.compat.numpy import function as nv From 762d6779cfafdccd972c2c96640751d58e01f764 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Thu, 22 Aug 2019 11:08:06 -0500 Subject: [PATCH 09/20] Solved the justify problem, and also added some None value for the max_colwidth --- doc/source/user_guide/options.rst | 2 +- pandas/core/config_init.py | 6 +++--- pandas/core/frame.py | 2 +- pandas/io/clipboards.py | 2 +- pandas/io/formats/format.py | 4 +++- pandas/io/formats/html.py | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index f32a8adfd4d33..25ae32d12065e 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -353,7 +353,7 @@ display.max_colwidth 50 The maximum width in charac a column in the repr of a pandas data structure. When the column overflows, a "..." placeholder is embedded in - the output. + the output. 'None' value means unlimited. display.max_info_columns 100 max_info_columns is used in DataFrame.info method to decide if per column information will be printed. diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 08dce6aca6e6d..0d3afacc6a229 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -150,10 +150,10 @@ def use_numexpr_cb(key): """ max_colwidth_doc = """ -: int +: int or None The maximum width in characters of a column in the repr of a pandas data structure. When the column overflows, a "..." - placeholder is embedded in the output. + placeholder is embedded in the output. A 'None' value means unlimited. """ colheader_justify_doc = """ @@ -342,7 +342,7 @@ def is_terminal(): validator=is_instance_factory([type(None), int]), ) cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int) - cf.register_option("max_colwidth", 50, max_colwidth_doc, validator=is_int) + cf.register_option("max_colwidth", 50, max_colwidth_doc) if is_terminal(): max_cols = 0 # automatically determine optimal number of columns else: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e676bd1add288..525596d595aa1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -709,7 +709,7 @@ def to_string( max_cols=None, show_dimensions=False, decimal=".", - max_colwidth=9999999, + max_colwidth=None, line_width=None, ): """ diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index d38221d784273..30e5179645f6b 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -128,7 +128,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover if isinstance(obj, ABCDataFrame): # str(df) has various unhelpful defaults, like truncation - with option_context("display.max_colwidth", 999999): + with option_context("display.max_colwidth", None): objstr = obj.to_string(**kwargs) else: objstr = str(obj) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 61af935bd8227..d4cad2adb8631 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -341,6 +341,7 @@ def _get_formatted_values(self) -> List[str]: None, float_format=self.float_format, na_rep=self.na_rep, + justify=self.justify, ) def to_string(self) -> str: @@ -912,6 +913,7 @@ def _format_col(self, i: int) -> List[str]: na_rep=self.na_rep, space=self.col_space, decimal=self.decimal, + justify=self.justify, ) def to_html( @@ -1685,7 +1687,7 @@ def _formatter(x): def _make_fixed_width( strings: List[str], - justify: str = "right", + justify: str = "default", minimum: Optional[int] = None, adj: Optional[TextAdjustment] = None, ) -> List[str]: diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 4b44893df70ed..63387e136266c 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -377,7 +377,7 @@ def _write_header(self, indent: int) -> None: self.write("", indent) def _get_formatted_values(self) -> Dict[int, List[str]]: - with option_context("display.max_colwidth", 999999): + with option_context("display.max_colwidth", None): fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)} return fmt_values From d64fcb804d85c1cb99bdbccb9807f2e839fc1ae5 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Thu, 22 Aug 2019 11:58:25 -0500 Subject: [PATCH 10/20] Swap out format to be None, ignore justification issues. --- pandas/io/formats/format.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index d4cad2adb8631..61af935bd8227 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -341,7 +341,6 @@ def _get_formatted_values(self) -> List[str]: None, float_format=self.float_format, na_rep=self.na_rep, - justify=self.justify, ) def to_string(self) -> str: @@ -913,7 +912,6 @@ def _format_col(self, i: int) -> List[str]: na_rep=self.na_rep, space=self.col_space, decimal=self.decimal, - justify=self.justify, ) def to_html( @@ -1687,7 +1685,7 @@ def _formatter(x): def _make_fixed_width( strings: List[str], - justify: str = "default", + justify: str = "right", minimum: Optional[int] = None, adj: Optional[TextAdjustment] = None, ) -> List[str]: From 6e792f8a2d23ec6f8958c4df9e9fa15971cd4055 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Thu, 22 Aug 2019 11:59:47 -0500 Subject: [PATCH 11/20] Reformat blac. --- pandas/core/frame.py | 3 ++- pandas/tests/io/formats/test_format.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 525596d595aa1..15bb86a506dfc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -736,7 +736,8 @@ def to_string( """ from pandas import option_context - with option_context('display.max_colwidth', max_colwidth): + + with option_context("display.max_colwidth", max_colwidth): formatter = fmt.DataFrameFormatter( self, columns=columns, diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 459437a1f333e..cef191807ee2d 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -528,7 +528,7 @@ def test_to_string_truncate(self): "a": "foo", "b": "bar", "c": "let's make this a very VERY long line that is longer " - "than the default 50 character limit", + "than the default 50 character limit", "d": 1, }, {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, From 3116ea1af4f7dedb91e35f4d0590432c2fe89981 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Mon, 26 Aug 2019 22:16:24 -0500 Subject: [PATCH 12/20] Use the is_nonnegative_int validator for the max_colwidth param. I didn't realize that this also allowed None until checking the docs, but it does so it's the perfect validator for our new parameter. --- pandas/core/config_init.py | 2 +- pandas/tests/config/test_config.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 0d3afacc6a229..9734758b40f69 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -342,7 +342,7 @@ def is_terminal(): validator=is_instance_factory([type(None), int]), ) cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int) - cf.register_option("max_colwidth", 50, max_colwidth_doc) + cf.register_option("max_colwidth", 50, max_colwidth_doc, validator=is_nonnegative_int) if is_terminal(): max_cols = 0 # automatically determine optimal number of columns else: diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py index efaeb7b1471ec..ac273c3404928 100644 --- a/pandas/tests/config/test_config.py +++ b/pandas/tests/config/test_config.py @@ -218,6 +218,7 @@ def test_validation(self): self.cf.set_option("a", 2) # int is_int self.cf.set_option("b.c", "wurld") # str is_str self.cf.set_option("d", 2) + self.cf.set_option("d", None) # non-negative int can be None # None not is_int with pytest.raises(ValueError, match=msg): From 840c1a68fb775604d8c4ad46589d42fdd3203729 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Mon, 26 Aug 2019 22:24:42 -0500 Subject: [PATCH 13/20] Added entry to whatsnew. --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7fe358d3820f2..e1cc3e796477f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -161,7 +161,7 @@ I/O - :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`) - Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) -- +- Bug in :func:`DataFrame.to_string()` where values were truncated using display options instead of outputting the full content, added new param ``max_colwidth`` instead (:issue:`9784`) Plotting ^^^^^^^^ From 2c68bbc4944cf44accbe69aefe40bcdce947f82e Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Mon, 26 Aug 2019 22:25:56 -0500 Subject: [PATCH 14/20] Fixed formatting with black. --- pandas/core/config_init.py | 4 +++- pandas/tests/config/test_config.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 9734758b40f69..bcb671c6d5e7a 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -342,7 +342,9 @@ def is_terminal(): validator=is_instance_factory([type(None), int]), ) cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int) - cf.register_option("max_colwidth", 50, max_colwidth_doc, validator=is_nonnegative_int) + cf.register_option( + "max_colwidth", 50, max_colwidth_doc, validator=is_nonnegative_int + ) if is_terminal(): max_cols = 0 # automatically determine optimal number of columns else: diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py index ac273c3404928..51640641c78e6 100644 --- a/pandas/tests/config/test_config.py +++ b/pandas/tests/config/test_config.py @@ -218,7 +218,7 @@ def test_validation(self): self.cf.set_option("a", 2) # int is_int self.cf.set_option("b.c", "wurld") # str is_str self.cf.set_option("d", 2) - self.cf.set_option("d", None) # non-negative int can be None + self.cf.set_option("d", None) # non-negative int can be None # None not is_int with pytest.raises(ValueError, match=msg): From 4e7fe8204f02e57f9ea6568f5aa8ac095474e8df Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Wed, 28 Aug 2019 08:29:32 -0500 Subject: [PATCH 15/20] Split whatsnew entry, add versionadded, reorder params --- doc/source/whatsnew/v1.0.0.rst | 4 ++-- pandas/core/frame.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e1cc3e796477f..1e40a6f16035c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -21,7 +21,7 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ -- +- :meth:`DataFrame.to_string` added new optional param ``max_colwidth`` to avoid use of display parameters (:issue:`9784`) - .. _whatsnew_1000.enhancements.other: @@ -161,7 +161,7 @@ I/O - :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`) - Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) -- Bug in :func:`DataFrame.to_string()` where values were truncated using display options instead of outputting the full content, added new param ``max_colwidth`` instead (:issue:`9784`) +- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) Plotting ^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf8f5c0e59efd..c7124084b02f7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -713,16 +713,19 @@ def to_string( max_cols=None, show_dimensions=False, decimal=".", - max_colwidth=None, line_width=None, + max_colwidth=None, ): """ Render a DataFrame to a console-friendly tabular output. %(shared_params)s - max_colwidth : int, optional - Max width to truncate each column in characters. By default, no limit. line_width : int, optional Width to wrap a line in characters. + max_colwidth : int, optional + Max width to truncate each column in characters. By default, no limit. + + .. versionadded:: 1.0.0 + %(returns)s See Also -------- From 90f0ee0f25bd181c9e57bbeb72b80131c612262d Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Wed, 28 Aug 2019 09:08:11 -0500 Subject: [PATCH 16/20] Remove double line break --- pandas/core/frame.py | 1 - scripts/validate_docstrings.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7124084b02f7..858d5164d2fed 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -725,7 +725,6 @@ def to_string( Max width to truncate each column in characters. By default, no limit. .. versionadded:: 1.0.0 - %(returns)s See Also -------- diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index bf5d861281a36..3879cf4ebfa7e 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -29,7 +29,7 @@ import ast import textwrap -import flake8.main.application +from flake8.main import application try: from io import StringIO From c2b8421b593ef8142410872b024e3ab147d1b11a Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Wed, 28 Aug 2019 09:11:18 -0500 Subject: [PATCH 17/20] Oops, didn't mean to update this script --- scripts/validate_docstrings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 3879cf4ebfa7e..bf5d861281a36 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -29,7 +29,7 @@ import ast import textwrap -from flake8.main import application +import flake8.main.application try: from io import StringIO From da031f2acef4b3f0c19a3e809774421b86e6dc59 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Fri, 30 Aug 2019 11:47:14 -0500 Subject: [PATCH 18/20] Correct word in whatsnew. --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 19da577676f7c..d6370ca442673 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -21,7 +21,7 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ -- :meth:`DataFrame.to_string` added new optional param ``max_colwidth`` to avoid use of display parameters (:issue:`9784`) +- :meth:`DataFrame.to_string` added new optional param ``max_colwidth`` to avoid use of display options (:issue:`9784`) - .. _whatsnew_1000.enhancements.other: From 0f8119ee0ea56dad49a04bf0176be6c2049ad5f8 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Fri, 30 Aug 2019 20:38:11 -0500 Subject: [PATCH 19/20] Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: Tom Augspurger --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 6cbca3d7c5928..65d8cfe4ebbb6 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -21,7 +21,7 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ -- :meth:`DataFrame.to_string` added new optional param ``max_colwidth`` to avoid use of display options (:issue:`9784`) +- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`) - .. _whatsnew_1000.enhancements.other: From b23da91fdedcc48823c5daa92da7e2de89a4fd32 Mon Sep 17 00:00:00 2001 From: Luke Shepard Date: Sat, 14 Sep 2019 09:07:24 -0500 Subject: [PATCH 20/20] Resolve conflict correctly. --- doc/source/whatsnew/v1.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index af6038f8d80fa..c78e27f098f13 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -21,7 +21,6 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ -- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) - :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`) -