diff --git a/doc/source/io.rst b/doc/source/io.rst index 0c4e097a1ba0a..496facbe91f3e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -278,7 +278,7 @@ over the string representation of the object. All arguments are optional: - ``buf`` default None, for example a StringIO object - ``columns`` default None, which columns to write - - ``colSpace`` default None, number of spaces to write between columns + - ``col_space`` default None, number of spaces to write between columns - ``na_rep`` default ``NaN``, representation of NA value - ``formatters`` default None, a dictionary (by column) of functions each of which takes a single argument and returns a formatted string @@ -288,6 +288,8 @@ over the string representation of the object. All arguments are optional: - ``sparsify`` default True, set to False for a DataFrame with a hierarchical index to print every multiindex key at each row. - ``index_names`` default True, will print the names of the indices + - ``index`` default True, will print the index (ie, row labels) + - ``header`` default True, will print the column labels The Series object also has a ``to_string`` method, but with only the ``buf``, ``na_rep``, ``float_format`` arguments. There is also a ``length`` argument diff --git a/pandas/core/common.py b/pandas/core/common.py index 06c794bdbd550..2e6e7e233a011 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -360,8 +360,8 @@ def _try_sort(iterable): except Exception: return listed -def set_printoptions(precision=None, column_space=None, max_rows=None, - max_columns=None): +def set_printoptions(precision=None, column_space=None, max_rows=None, + max_columns=None, justify='right'): """ Alter default behavior of DataFrame.toString @@ -376,8 +376,11 @@ def set_printoptions(precision=None, column_space=None, max_rows=None, Either one, or both can be set to 0 (experimental). Pandas will figure out how big the terminal is and will not display more rows or/and columns that can fit on it. + justify : string + 'right' or 'left' to justify the values of the dataframe using this + alignment """ - global _float_format, _column_space, _max_rows, _max_columns + global _float_format, _column_space, _max_rows, _max_columns, _justify if precision is not None: float_format = '%.' + '%d' % precision + 'g' _float_format = lambda x: float_format % x @@ -387,6 +390,8 @@ def set_printoptions(precision=None, column_space=None, max_rows=None, _max_rows = max_rows if max_columns is not None: _max_columns = max_columns + if justify is not None and justify in ('right', 'left'): + _justify = justify class EngFormatter(object): """ @@ -491,33 +496,11 @@ def set_eng_float_format(precision=3, use_eng_prefix=False): _float_format = EngFormatter(precision, use_eng_prefix) _column_space = max(12, precision + 9) -_float_format = lambda x: '%.4g' % x +_float_format = lambda x: '% .4f' % x _column_space = 12 _max_rows = 500 _max_columns = 0 - -def _pfixed(s, space, na_rep=None, float_format=None): - if isinstance(s, float): - if na_rep is not None and isnull(s): - if np.isnan(s): - s = na_rep - return (' %s' % s).ljust(space) - - if float_format: - formatted = float_format(s) - else: - is_neg = s < 0 - formatted = _float_format(np.abs(s)) - - if is_neg: - formatted = '-' + formatted - else: - formatted = ' ' + formatted - - return formatted.ljust(space) - else: - stringified = _stringify(s) - return (' %s' % stringified)[:space].ljust(space) +_justify = 'right' def _stringify(col): # unicode workaround @@ -526,29 +509,31 @@ def _stringify(col): else: return '%s' % col -def _format(s, na_rep=None, float_format=None): +def _format(s, space=None, na_rep=None, float_format=None): + def _just_help(x): + if space is None: + return x + if _justify == 'right': + return x[:space].rjust(space) + else: + return x[:space].ljust(space) + if isinstance(s, float): if na_rep is not None and isnull(s): if np.isnan(s): s = na_rep - return ' %s' % s + return _just_help('%s' % s) if float_format: formatted = float_format(s) else: - is_neg = s < 0 - formatted = _float_format(np.abs(s)) - - if is_neg: - formatted = '-' + formatted - else: - formatted = ' ' + formatted + formatted = _float_format(s) - return formatted + return _just_help(formatted) else: - return ' %s' % _stringify(s) + return _just_help('%s' % _stringify(s)) -#------------------------------------------------------------------------------- +#------------------------------------------------------------------------------ # miscellaneous python tools def rands(n): @@ -564,14 +549,22 @@ def adjoin(space, *lists): """ outLines = [] newLists = [] - lengths = [max(map(len, x)) + space for x in lists[:-1]] - # not the last one - lengths.append(max(map(len, lists[-1]))) + if _justify == 'right': + # everyone but the first one, add space (right-aligned) + lengths = [max(map(len, x)) + space for x in lists[1:]] + lengths.insert(0, max(map(len, lists[0]))) + else: + # everyone but the last one, add space (left-aligned) + lengths = [max(map(len, x)) + space for x in lists[:-1]] + lengths.append(max(map(len, lists[-1]))) maxLen = max(map(len, lists)) for i, lst in enumerate(lists): - nl = [x.ljust(lengths[i]) for x in lst] + if _justify == 'right': + nl = [x.rjust(lengths[i]) for x in lst] + else: + nl = [x.ljust(lengths[i]) for x in lst] nl.extend([' ' * lengths[i]] * (maxLen - len(lst))) newLists.append(nl) toJoin = zip(*newLists) @@ -691,6 +684,9 @@ def is_integer_dtype(arr): def is_float_dtype(arr): return issubclass(arr.dtype.type, np.floating) +def is_numeric_dtype(arr): + return is_integer_dtype(arr) or is_float_dtype(arr) + def save(obj, path): """ Pickle (serialize) object to input file path diff --git a/pandas/core/format.py b/pandas/core/format.py index 0b1905a91e3dc..bf058c91b9b6c 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1,19 +1,50 @@ from StringIO import StringIO -from pandas.core.common import adjoin, _pfixed +from pandas.core.common import adjoin, is_numeric_dtype from pandas.core.index import MultiIndex, _ensure_index +docstring_to_string = """ + Parameters + ---------- + frame : DataFrame + object to render + buf : StringIO-like, optional + buffer to write to + columns : sequence, optional + the subset of columns to write; default None writes all columns + col_space : int, optional + the width of each columns + header : bool, optional + whether to print column labels, default True + index : bool, optional + whether to print index (row) labels, default True + na_rep : string, optional + string representation of NAN to use, default 'NaN' + formatters : list or dict of one-parameter functions, optional + formatter functions to apply to columns' elements by position or name, + default None + float_format : one-parameter function, optional + formatter function to apply to columns' elements if they are floats + default None + sparsify : bool, optional + Set to False for a DataFrame with a hierarchical index to print every + multiindex key at each row, default True + index_names : bool, optional + Prints the names of the indexes, default True """ class DataFrameFormatter(object): """ Render a DataFrame self.to_string() : console-friendly tabular output - self.to_html() : html table + self.to_html() : html table + """ - def __init__(self, frame, buf=None, columns=None, col_space=None, - na_rep='NaN', formatters=None, float_format=None, - sparsify=True, index_names=True): + __doc__ += docstring_to_string + + def __init__(self, frame, buf=None, columns=None, col_space=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=True, index_names=True): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names @@ -22,6 +53,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.formatters = formatters self.na_rep = na_rep self.col_space = col_space + self.header = header + self.index = index if columns is not None: self.columns = _ensure_index(columns) @@ -47,10 +80,16 @@ def to_string(self): str_index = self._get_formatted_index() str_columns = self._get_formatted_column_labels() - stringified = [str_columns[i] + format_col(c) - for i, c in enumerate(self.columns)] + if self.header: + stringified = [str_columns[i] + format_col(c) + for i, c in enumerate(self.columns)] + else: + stringified = [format_col(c) for c in self.columns] - to_write.append(adjoin(1, str_index, *stringified)) + if self.index: + to_write.append(adjoin(1, str_index, *stringified)) + else: + to_write.append(adjoin(1, *stringified)) for s in to_write: if isinstance(s, unicode): @@ -114,17 +153,21 @@ def _column_header(): write(buf, '', indent + indent_delta) else: indent += indent_delta - write(buf, '', indent) - row = [] # header row - col_row = _column_header() - indent += indent_delta - write_tr(buf, col_row, indent, indent_delta, header=True) - if self.has_index_names: - row = frame.index.names + [''] * len(frame.columns) - write_tr(buf, row, indent, indent_delta, header=True) - write(buf, '', indent) + if self.header: + write(buf, '', indent) + row = [] + + col_row = _column_header() + indent += indent_delta + write_tr(buf, col_row, indent, indent_delta, header=True) + if self.has_index_names: + row = frame.index.names + [''] * len(frame.columns) + write_tr(buf, row, indent, indent_delta, header=True) + + write(buf, '', indent) + write(buf, '', indent) # write values @@ -148,14 +191,9 @@ def _get_column_formatter(self): col_space = self.col_space - if col_space is None: - def _myformat(v): - return _format(v, na_rep=self.na_rep, - float_format=self.float_format) - else: - def _myformat(v): - return _pfixed(v, col_space, na_rep=self.na_rep, - float_format=self.float_format) + def _myformat(v): + return _format(v, space=col_space, na_rep=self.na_rep, + float_format=self.float_format) formatters = {} if self.formatters is None else self.formatters @@ -171,16 +209,24 @@ def _format_col(col, i=None): def _get_formatted_column_labels(self): from pandas.core.index import _sparsify + formatters = self.formatters + if formatters is None: + formatters = {} + if isinstance(self.columns, MultiIndex): fmt_columns = self.columns.format(sparsify=False, adjoin=False) - str_columns = zip(*[[' %s' % y for y in x] + str_columns = zip(*[[' %s' % y if y not in formatters and is_numeric_dtype(self.frame[x]) + else str(y) + for y in x] for x in zip(*fmt_columns)]) if self.sparsify: str_columns = _sparsify(str_columns) str_columns = [list(x) for x in zip(*str_columns)] else: - str_columns = [[' %s' % x] for x in self.columns.format()] + str_columns = [[' %s' % x if x not in formatters and is_numeric_dtype(self.frame[x]) + else str(x)] + for x in self.columns.format()] if self.show_index_names and self.has_index_names: for x in str_columns: @@ -201,7 +247,7 @@ def _get_formatted_index(self): columns = self.frame.columns show_index_names = self.show_index_names and self.has_index_names - show_col_names = self.show_index_names and self.has_column_names + show_col_names = (self.show_index_names and self.has_column_names) if isinstance(index, MultiIndex): fmt_index = index.format(sparsify=self.sparsify, adjoin=False, @@ -213,11 +259,14 @@ def _get_formatted_index(self): # empty space for columns if show_col_names: - col_header = [' %s' % x for x in self._get_column_name_list()] + col_header = ['%s' % x for x in self._get_column_name_list()] else: col_header = [''] * columns.nlevels - return col_header + adjoined + if self.header: + return col_header + adjoined + else: + return adjoined def _get_column_name_list(self): names = [] @@ -229,7 +278,6 @@ def _get_column_name_list(self): names.append('' if columns.name is None else columns.name) return names - def single_column_table(column): table = '' for i in column: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4d7fc50dc0d69..998f828ae3d92 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -32,7 +32,9 @@ from pandas.core.series import Series from pandas.util import py3compat from pandas.util.terminal import get_terminal_size -from pandas.util.decorators import deprecate +from pandas.util.decorators import deprecate, Appender + +from pandas.core.format import DataFrameFormatter, docstring_to_string import pandas.core.nanops as nanops import pandas.core.common as com @@ -924,13 +926,14 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, f.close() - def to_string(self, buf=None, columns=None, colSpace=None, - na_rep='NaN', formatters=None, float_format=None, - sparsify=True, nanRep=None, index_names=True): + @Appender(docstring_to_string, indents=1) + def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=True, nanRep=None, + index_names=True): """ Render a DataFrame to a console-friendly tabular output. """ - from pandas.core.format import DataFrameFormatter if nanRep is not None: # pragma: no cover import warnings @@ -938,28 +941,41 @@ def to_string(self, buf=None, columns=None, colSpace=None, FutureWarning) na_rep = nanRep + if colSpace is not None: # pragma: no cover + import warnings + warnings.warn("colSpace is deprecated, use col_space", + FutureWarning) + col_space = colSpace formatter = DataFrameFormatter(self, buf=buf, columns=columns, - col_space=colSpace, na_rep=na_rep, + col_space=col_space, na_rep=na_rep, formatters=formatters, float_format=float_format, sparsify=sparsify, - index_names=index_names) + index_names=index_names, + header=header, index=index) formatter.to_string() if buf is None: return formatter.buf.getvalue() - def to_html(self, buf=None, columns=None, colSpace=None, - na_rep='NaN', formatters=None, float_format=None, - sparsify=True, index_names=True): + @Appender(docstring_to_string, indents=1) + def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=True, index_names=True): """ - Render a DataFrame to a html table. + Render a DataFrame to an html table. """ - from pandas.core.format import DataFrameFormatter + + if colSpace is not None: # pragma: no cover + import warnings + warnings.warn("colSpace is deprecated, use col_space", + FutureWarning) + col_space = colSpace formatter = DataFrameFormatter(self, buf=buf, columns=columns, - col_space=colSpace, na_rep=na_rep, + col_space=col_space, na_rep=na_rep, + header=header, index=index, formatters=formatters, float_format=float_format, sparsify=sparsify, diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index be738cc42b2a6..0caa525e9912c 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1672,7 +1672,7 @@ def test_to_string(self): # big mixed biggie = DataFrame({'A' : randn(1000), - 'B' : tm.makeStringIndex(1000)}, + 'B' : tm.makeStringIndex(1000)}, index=range(1000)) biggie['A'][:20] = nan @@ -1716,6 +1716,15 @@ def test_to_string(self): frame = DataFrame(index=np.arange(1000)) frame.to_string() + def test_to_string_no_header(self): + df = DataFrame({'x' : [1, 2, 3], + 'y' : [4, 5, 6]}) + + df_s = df.to_string(header=False) + expected = "0 1 4\n1 2 5\n2 3 6" + + assert(df_s == expected) + def test_to_html(self): # big mixed biggie = DataFrame({'A' : randn(1000), diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index cbca3763de0b0..8512d38d9cd93 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -9,3 +9,94 @@ def wrapper(*args, **kwargs): return alternative(*args, **kwargs) return wrapper +# Substitution and Appender are derived from matplotlib.docstring (1.1.0) +# module http://matplotlib.sourceforge.net/users/license.html + +class Substitution(object): + """ + A decorator to take a function's docstring and perform string + substitution on it. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter) + + Usage: construct a docstring.Substitution with a sequence or + dictionary suitable for performing substitution; then + decorate a suitable function with the constructed object. e.g. + + sub_author_name = Substitution(author='Jason') + + @sub_author_name + def some_function(x): + "%(author)s wrote this function" + + # note that some_function.__doc__ is now "Jason wrote this function" + + One can also use positional arguments. + + sub_first_last_names = Substitution('Edgar Allen', 'Poe') + + @sub_first_last_names + def some_function(x): + "%s %s wrote the Raven" + """ + def __init__(self, *args, **kwargs): + assert not (args and kwargs), "Only positional or keyword args are allowed" + self.params = args or kwargs + + def __call__(self, func): + func.__doc__ = func.__doc__ and func.__doc__ % self.params + return func + + def update(self, *args, **kwargs): + "Assume self.params is a dict and update it with supplied args" + self.params.update(*args, **kwargs) + + @classmethod + def from_params(cls, params): + """ + In the case where the params is a mutable sequence (list or dictionary) + and it may change before this class is called, one may explicitly use a + reference to the params rather than using *args or **kwargs which will + copy the values and not reference them. + """ + result = cls() + result.params = params + return result + +class Appender(object): + """ + A function decorator that will append an addendum to the docstring + of the target function. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter). + + Usage: construct a docstring.Appender with a string to be joined to + the original docstring. An optional 'join' parameter may be supplied + which will be used to join the docstring and addendum. e.g. + + add_copyright = Appender("Copyright (c) 2009", join='\n') + + @add_copyright + def my_dog(has='fleas'): + "This docstring will have a copyright below" + pass + """ + def __init__(self, addendum, join='', indents=0): + if indents > 0: + self.addendum = indent(addendum, indents=indents) + else: + self.addendum = addendum + self.join = join + + def __call__(self, func): + docitems = [func.__doc__, self.addendum] + func.__doc__ = func.__doc__ and ''.join(docitems) + return func + +def indent(text, indents=1): + if not text or type(text) != str: + return '' + jointext = ''.join(['\n'] + [' '] * indents) + return jointext.join(text.split('\n'))