Skip to content

Commit

Permalink
BUG: output formatting with to_html(), index=False and/or index_names…
Browse files Browse the repository at this point in the history
  • Loading branch information
simonjayhawkins authored and Pingviinituutti committed Feb 28, 2019
1 parent 46eced8 commit f95258e
Show file tree
Hide file tree
Showing 84 changed files with 2,287 additions and 24 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1596,6 +1596,8 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
- :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`)
- Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`)
- Bug in :func:`to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`)
- Bug in :func:`to_html()` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`)
- Bug in :func:`to_html()` with ``index_names=False`` displaying index name (:issue:`22747`)
- Bug in :func:`DataFrame.to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
- Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)
- Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
Expand Down
100 changes: 79 additions & 21 deletions pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,35 @@ def __init__(self, formatter, classes=None, notebook=False, border=None,
self.table_id = table_id
self.render_links = render_links

@property
def show_col_idx_names(self):
# see gh-22579
# Column misalignment also occurs for
# a standard index when the columns index is named.
# Determine if ANY column names need to be displayed
# since if the row index is not displayed a column of
# blank cells need to be included before the DataFrame values.
# TODO: refactor to add show_col_idx_names property to
# DataFrameFormatter
return all((self.fmt.has_column_names,
self.fmt.show_index_names,
self.fmt.header))

@property
def row_levels(self):
if self.fmt.index:
# showing (row) index
return self.frame.index.nlevels
elif self.show_col_idx_names:
# see gh-22579
# Column misalignment also occurs for
# a standard index when the columns index is named.
# If the row index is not displayed a column of
# blank cells need to be included before the DataFrame values.
return 1
# not showing (row) index
return 0

@property
def is_truncated(self):
return self.fmt.is_truncated
Expand Down Expand Up @@ -201,7 +230,7 @@ def write_result(self, buf):

def _write_header(self, indent):
truncate_h = self.fmt.truncate_h
row_levels = self.frame.index.nlevels

if not self.fmt.header:
# write nothing
return indent
Expand Down Expand Up @@ -267,12 +296,26 @@ def _write_header(self, indent):
values = (values[:ins_col] + [u('...')] +
values[ins_col:])

name = self.columns.names[lnum]
row = [''] * (row_levels - 1) + ['' if name is None else
pprint_thing(name)]

if row == [""] and self.fmt.index is False:
row = []
# see gh-22579
# Column Offset Bug with to_html(index=False) with
# MultiIndex Columns and Index.
# Initially fill row with blank cells before column names.
# TODO: Refactor to remove code duplication with code
# block below for standard columns index.
row = [''] * (self.row_levels - 1)
if self.fmt.index or self.show_col_idx_names:
# see gh-22747
# If to_html(index_names=False) do not show columns
# index names.
# TODO: Refactor to use _get_column_name_list from
# DataFrameFormatter class and create a
# _get_formatted_column_labels function for code
# parity with DataFrameFormatter class.
if self.fmt.show_index_names:
name = self.columns.names[lnum]
row.append(pprint_thing(name or ''))
else:
row.append('')

tags = {}
j = len(row)
Expand All @@ -287,18 +330,28 @@ def _write_header(self, indent):
self.write_tr(row, indent, self.indent_delta, tags=tags,
header=True)
else:
if self.fmt.index:
row = [''] * (self.frame.index.nlevels - 1)
row.append(self.columns.name or '')
else:
row = []
# see gh-22579
# Column misalignment also occurs for
# a standard index when the columns index is named.
# Initially fill row with blank cells before column names.
# TODO: Refactor to remove code duplication with code block
# above for columns MultiIndex.
row = [''] * (self.row_levels - 1)
if self.fmt.index or self.show_col_idx_names:
# see gh-22747
# If to_html(index_names=False) do not show columns
# index names.
# TODO: Refactor to use _get_column_name_list from
# DataFrameFormatter class.
if self.fmt.show_index_names:
row.append(self.columns.name or '')
else:
row.append('')
row.extend(self.columns)
align = self.fmt.justify

if truncate_h:
if not self.fmt.index:
row_levels = 0
ins_col = row_levels + self.fmt.tr_col_num
ins_col = self.row_levels + self.fmt.tr_col_num
row.insert(ins_col, '...')

self.write_tr(row, indent, self.indent_delta, header=True,
Expand Down Expand Up @@ -346,28 +399,31 @@ def _write_regular_rows(self, fmt_values, indent):
index_values = self.fmt.tr_frame.index.map(fmt)
else:
index_values = self.fmt.tr_frame.index.format()
row_levels = 1
else:
row_levels = 0

row = []
for i in range(nrows):

if truncate_v and i == (self.fmt.tr_row_num):
str_sep_row = ['...'] * len(row)
self.write_tr(str_sep_row, indent, self.indent_delta,
tags=None, nindex_levels=row_levels)
tags=None, nindex_levels=self.row_levels)

row = []
if self.fmt.index:
row.append(index_values[i])
# see gh-22579
# Column misalignment also occurs for
# a standard index when the columns index is named.
# Add blank cell before data cells.
elif self.show_col_idx_names:
row.append('')
row.extend(fmt_values[j][i] for j in range(self.ncols))

if truncate_h:
dot_col_ix = self.fmt.tr_col_num + row_levels
dot_col_ix = self.fmt.tr_col_num + self.row_levels
row.insert(dot_col_ix, '...')
self.write_tr(row, indent, self.indent_delta, tags=None,
nindex_levels=row_levels)
nindex_levels=self.row_levels)

def _write_hierarchical_rows(self, fmt_values, indent):
template = 'rowspan="{span}" valign="top"'
Expand All @@ -376,6 +432,8 @@ def _write_hierarchical_rows(self, fmt_values, indent):
truncate_v = self.fmt.truncate_v
frame = self.fmt.tr_frame
nrows = len(frame)
# TODO: after gh-22887 fixed, refactor to use class property
# in place of row_levels
row_levels = self.frame.index.nlevels

idx_values = frame.index.format(sparsify=False, adjoin=False,
Expand Down
File renamed without changes.
76 changes: 76 additions & 0 deletions pandas/tests/io/formats/data/html/gh22579_expected_output.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<table border="1" class="dataframe">
<thead>
<tr>
<th colspan="2" halign="left">a</th>
<th colspan="2" halign="left">b</th>
</tr>
<tr>
<th>c</th>
<th>d</th>
<th>c</th>
<th>d</th>
</tr>
</thead>
<tbody>
<tr>
<td>0</td>
<td>10</td>
<td>10</td>
<td>10</td>
</tr>
<tr>
<td>1</td>
<td>11</td>
<td>11</td>
<td>11</td>
</tr>
<tr>
<td>2</td>
<td>12</td>
<td>12</td>
<td>12</td>
</tr>
<tr>
<td>3</td>
<td>13</td>
<td>13</td>
<td>13</td>
</tr>
<tr>
<td>4</td>
<td>14</td>
<td>14</td>
<td>14</td>
</tr>
<tr>
<td>5</td>
<td>15</td>
<td>15</td>
<td>15</td>
</tr>
<tr>
<td>6</td>
<td>16</td>
<td>16</td>
<td>16</td>
</tr>
<tr>
<td>7</td>
<td>17</td>
<td>17</td>
<td>17</td>
</tr>
<tr>
<td>8</td>
<td>18</td>
<td>18</td>
<td>18</td>
</tr>
<tr>
<td>9</td>
<td>19</td>
<td>19</td>
<td>19</td>
</tr>
</tbody>
</table>
30 changes: 30 additions & 0 deletions pandas/tests/io/formats/data/html/gh22783_named_columns_index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>columns.name</th>
<th>0</th>
<th>1</th>
<th>...</th>
<th>3</th>
<th>4</th>
</tr>
</thead>
<tbody>
<tr>
<th></th>
<td>1.764052</td>
<td>0.400157</td>
<td>...</td>
<td>2.240893</td>
<td>1.867558</td>
</tr>
<tr>
<th></th>
<td>-0.977278</td>
<td>0.950088</td>
<td>...</td>
<td>-0.103219</td>
<td>0.410599</td>
</tr>
</tbody>
</table>
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<table border="1" class="dataframe">
<thead>
<tr>
<th></th>
<th>columns.name.0</th>
<th colspan="2" halign="left">a</th>
</tr>
<tr>
<th></th>
<th>columns.name.1</th>
<th>b</th>
<th>c</th>
</tr>
<tr>
<th>index.name.0</th>
<th>index.name.1</th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th rowspan="2" valign="top">a</th>
<th>b</th>
<td>0</td>
<td>0</td>
</tr>
<tr>
<th>c</th>
<td>0</td>
<td>0</td>
</tr>
</tbody>
</table>
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>columns.name</th>
<th>0</th>
<th>1</th>
</tr>
<tr>
<th>index.name.0</th>
<th>index.name.1</th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th rowspan="2" valign="top">a</th>
<th>b</th>
<td>0</td>
<td>0</td>
</tr>
<tr>
<th>c</th>
<td>0</td>
<td>0</td>
</tr>
</tbody>
</table>
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<table border="1" class="dataframe">
<thead>
<tr>
<th></th>
<th></th>
<th colspan="2" halign="left">a</th>
</tr>
<tr>
<th></th>
<th></th>
<th>b</th>
<th>c</th>
</tr>
<tr>
<th>index.name.0</th>
<th>index.name.1</th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th rowspan="2" valign="top">a</th>
<th>b</th>
<td>0</td>
<td>0</td>
</tr>
<tr>
<th>c</th>
<td>0</td>
<td>0</td>
</tr>
</tbody>
</table>
Loading

0 comments on commit f95258e

Please sign in to comment.