diff --git a/doc/source/io.rst b/doc/source/io.rst index 0c4e097a1ba0a..496facbe91f3e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -278,7 +278,7 @@ over the string representation of the object. All arguments are optional: - ``buf`` default None, for example a StringIO object - ``columns`` default None, which columns to write - - ``colSpace`` default None, number of spaces to write between columns + - ``col_space`` default None, number of spaces to write between columns - ``na_rep`` default ``NaN``, representation of NA value - ``formatters`` default None, a dictionary (by column) of functions each of which takes a single argument and returns a formatted string @@ -288,6 +288,8 @@ over the string representation of the object. All arguments are optional: - ``sparsify`` default True, set to False for a DataFrame with a hierarchical index to print every multiindex key at each row. - ``index_names`` default True, will print the names of the indices + - ``index`` default True, will print the index (ie, row labels) + - ``header`` default True, will print the column labels The Series object also has a ``to_string`` method, but with only the ``buf``, ``na_rep``, ``float_format`` arguments. There is also a ``length`` argument diff --git a/pandas/core/common.py b/pandas/core/common.py index 06c794bdbd550..2e6e7e233a011 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -360,8 +360,8 @@ def _try_sort(iterable): except Exception: return listed -def set_printoptions(precision=None, column_space=None, max_rows=None, - max_columns=None): +def set_printoptions(precision=None, column_space=None, max_rows=None, + max_columns=None, justify='right'): """ Alter default behavior of DataFrame.toString @@ -376,8 +376,11 @@ def set_printoptions(precision=None, column_space=None, max_rows=None, Either one, or both can be set to 0 (experimental). Pandas will figure out how big the terminal is and will not display more rows or/and columns that can fit on it. + justify : string + 'right' or 'left' to justify the values of the dataframe using this + alignment """ - global _float_format, _column_space, _max_rows, _max_columns + global _float_format, _column_space, _max_rows, _max_columns, _justify if precision is not None: float_format = '%.' + '%d' % precision + 'g' _float_format = lambda x: float_format % x @@ -387,6 +390,8 @@ def set_printoptions(precision=None, column_space=None, max_rows=None, _max_rows = max_rows if max_columns is not None: _max_columns = max_columns + if justify is not None and justify in ('right', 'left'): + _justify = justify class EngFormatter(object): """ @@ -491,33 +496,11 @@ def set_eng_float_format(precision=3, use_eng_prefix=False): _float_format = EngFormatter(precision, use_eng_prefix) _column_space = max(12, precision + 9) -_float_format = lambda x: '%.4g' % x +_float_format = lambda x: '% .4f' % x _column_space = 12 _max_rows = 500 _max_columns = 0 - -def _pfixed(s, space, na_rep=None, float_format=None): - if isinstance(s, float): - if na_rep is not None and isnull(s): - if np.isnan(s): - s = na_rep - return (' %s' % s).ljust(space) - - if float_format: - formatted = float_format(s) - else: - is_neg = s < 0 - formatted = _float_format(np.abs(s)) - - if is_neg: - formatted = '-' + formatted - else: - formatted = ' ' + formatted - - return formatted.ljust(space) - else: - stringified = _stringify(s) - return (' %s' % stringified)[:space].ljust(space) +_justify = 'right' def _stringify(col): # unicode workaround @@ -526,29 +509,31 @@ def _stringify(col): else: return '%s' % col -def _format(s, na_rep=None, float_format=None): +def _format(s, space=None, na_rep=None, float_format=None): + def _just_help(x): + if space is None: + return x + if _justify == 'right': + return x[:space].rjust(space) + else: + return x[:space].ljust(space) + if isinstance(s, float): if na_rep is not None and isnull(s): if np.isnan(s): s = na_rep - return ' %s' % s + return _just_help('%s' % s) if float_format: formatted = float_format(s) else: - is_neg = s < 0 - formatted = _float_format(np.abs(s)) - - if is_neg: - formatted = '-' + formatted - else: - formatted = ' ' + formatted + formatted = _float_format(s) - return formatted + return _just_help(formatted) else: - return ' %s' % _stringify(s) + return _just_help('%s' % _stringify(s)) -#------------------------------------------------------------------------------- +#------------------------------------------------------------------------------ # miscellaneous python tools def rands(n): @@ -564,14 +549,22 @@ def adjoin(space, *lists): """ outLines = [] newLists = [] - lengths = [max(map(len, x)) + space for x in lists[:-1]] - # not the last one - lengths.append(max(map(len, lists[-1]))) + if _justify == 'right': + # everyone but the first one, add space (right-aligned) + lengths = [max(map(len, x)) + space for x in lists[1:]] + lengths.insert(0, max(map(len, lists[0]))) + else: + # everyone but the last one, add space (left-aligned) + lengths = [max(map(len, x)) + space for x in lists[:-1]] + lengths.append(max(map(len, lists[-1]))) maxLen = max(map(len, lists)) for i, lst in enumerate(lists): - nl = [x.ljust(lengths[i]) for x in lst] + if _justify == 'right': + nl = [x.rjust(lengths[i]) for x in lst] + else: + nl = [x.ljust(lengths[i]) for x in lst] nl.extend([' ' * lengths[i]] * (maxLen - len(lst))) newLists.append(nl) toJoin = zip(*newLists) @@ -691,6 +684,9 @@ def is_integer_dtype(arr): def is_float_dtype(arr): return issubclass(arr.dtype.type, np.floating) +def is_numeric_dtype(arr): + return is_integer_dtype(arr) or is_float_dtype(arr) + def save(obj, path): """ Pickle (serialize) object to input file path diff --git a/pandas/core/format.py b/pandas/core/format.py index 0b1905a91e3dc..bf058c91b9b6c 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1,19 +1,50 @@ from StringIO import StringIO -from pandas.core.common import adjoin, _pfixed +from pandas.core.common import adjoin, is_numeric_dtype from pandas.core.index import MultiIndex, _ensure_index +docstring_to_string = """ + Parameters + ---------- + frame : DataFrame + object to render + buf : StringIO-like, optional + buffer to write to + columns : sequence, optional + the subset of columns to write; default None writes all columns + col_space : int, optional + the width of each columns + header : bool, optional + whether to print column labels, default True + index : bool, optional + whether to print index (row) labels, default True + na_rep : string, optional + string representation of NAN to use, default 'NaN' + formatters : list or dict of one-parameter functions, optional + formatter functions to apply to columns' elements by position or name, + default None + float_format : one-parameter function, optional + formatter function to apply to columns' elements if they are floats + default None + sparsify : bool, optional + Set to False for a DataFrame with a hierarchical index to print every + multiindex key at each row, default True + index_names : bool, optional + Prints the names of the indexes, default True """ class DataFrameFormatter(object): """ Render a DataFrame self.to_string() : console-friendly tabular output - self.to_html() : html table + self.to_html() : html table + """ - def __init__(self, frame, buf=None, columns=None, col_space=None, - na_rep='NaN', formatters=None, float_format=None, - sparsify=True, index_names=True): + __doc__ += docstring_to_string + + def __init__(self, frame, buf=None, columns=None, col_space=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=True, index_names=True): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names @@ -22,6 +53,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.formatters = formatters self.na_rep = na_rep self.col_space = col_space + self.header = header + self.index = index if columns is not None: self.columns = _ensure_index(columns) @@ -47,10 +80,16 @@ def to_string(self): str_index = self._get_formatted_index() str_columns = self._get_formatted_column_labels() - stringified = [str_columns[i] + format_col(c) - for i, c in enumerate(self.columns)] + if self.header: + stringified = [str_columns[i] + format_col(c) + for i, c in enumerate(self.columns)] + else: + stringified = [format_col(c) for c in self.columns] - to_write.append(adjoin(1, str_index, *stringified)) + if self.index: + to_write.append(adjoin(1, str_index, *stringified)) + else: + to_write.append(adjoin(1, *stringified)) for s in to_write: if isinstance(s, unicode): @@ -114,17 +153,21 @@ def _column_header(): write(buf, '', indent + indent_delta) else: indent += indent_delta - write(buf, '', indent) - row = [] # header row - col_row = _column_header() - indent += indent_delta - write_tr(buf, col_row, indent, indent_delta, header=True) - if self.has_index_names: - row = frame.index.names + [''] * len(frame.columns) - write_tr(buf, row, indent, indent_delta, header=True) - write(buf, '', indent) + if self.header: + write(buf, '', indent) + row = [] + + col_row = _column_header() + indent += indent_delta + write_tr(buf, col_row, indent, indent_delta, header=True) + if self.has_index_names: + row = frame.index.names + [''] * len(frame.columns) + write_tr(buf, row, indent, indent_delta, header=True) + + write(buf, '', indent) + write(buf, '
', indent) # write values @@ -148,14 +191,9 @@ def _get_column_formatter(self): col_space = self.col_space - if col_space is None: - def _myformat(v): - return _format(v, na_rep=self.na_rep, - float_format=self.float_format) - else: - def _myformat(v): - return _pfixed(v, col_space, na_rep=self.na_rep, - float_format=self.float_format) + def _myformat(v): + return _format(v, space=col_space, na_rep=self.na_rep, + float_format=self.float_format) formatters = {} if self.formatters is None else self.formatters @@ -171,16 +209,24 @@ def _format_col(col, i=None): def _get_formatted_column_labels(self): from pandas.core.index import _sparsify + formatters = self.formatters + if formatters is None: + formatters = {} + if isinstance(self.columns, MultiIndex): fmt_columns = self.columns.format(sparsify=False, adjoin=False) - str_columns = zip(*[[' %s' % y for y in x] + str_columns = zip(*[[' %s' % y if y not in formatters and is_numeric_dtype(self.frame[x]) + else str(y) + for y in x] for x in zip(*fmt_columns)]) if self.sparsify: str_columns = _sparsify(str_columns) str_columns = [list(x) for x in zip(*str_columns)] else: - str_columns = [[' %s' % x] for x in self.columns.format()] + str_columns = [[' %s' % x if x not in formatters and is_numeric_dtype(self.frame[x]) + else str(x)] + for x in self.columns.format()] if self.show_index_names and self.has_index_names: for x in str_columns: @@ -201,7 +247,7 @@ def _get_formatted_index(self): columns = self.frame.columns show_index_names = self.show_index_names and self.has_index_names - show_col_names = self.show_index_names and self.has_column_names + show_col_names = (self.show_index_names and self.has_column_names) if isinstance(index, MultiIndex): fmt_index = index.format(sparsify=self.sparsify, adjoin=False, @@ -213,11 +259,14 @@ def _get_formatted_index(self): # empty space for columns if show_col_names: - col_header = [' %s' % x for x in self._get_column_name_list()] + col_header = ['%s' % x for x in self._get_column_name_list()] else: col_header = [''] * columns.nlevels - return col_header + adjoined + if self.header: + return col_header + adjoined + else: + return adjoined def _get_column_name_list(self): names = [] @@ -229,7 +278,6 @@ def _get_column_name_list(self): names.append('' if columns.name is None else columns.name) return names - def single_column_table(column): table = '