BUG: output formatting with to_html(), index=False and/or index_names…

…=False (pandas-dev#22579, pandas-dev#22747) (pandas-dev#22655)
Pingviinituutti · Feb 28, 2019 · f95258e · f95258e
1 parent 46eced8
commit f95258e
Show file tree

Hide file tree

Showing 84 changed files with 2,287 additions and 24 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1596,6 +1596,8 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`)
 - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`)
 - Bug in :func:`to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`)
+- Bug in :func:`to_html()` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`)
+- Bug in :func:`to_html()` with ``index_names=False`` displaying index name (:issue:`22747`)
 - Bug in :func:`DataFrame.to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
 - Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)
 - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).

diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
@@ -43,6 +43,35 @@ def __init__(self, formatter, classes=None, notebook=False, border=None,
         self.table_id = table_id
         self.render_links = render_links
 
+    @property
+    def show_col_idx_names(self):
+        # see gh-22579
+        # Column misalignment also occurs for
+        # a standard index when the columns index is named.
+        # Determine if ANY column names need to be displayed
+        # since if the row index is not displayed a column of
+        # blank cells need to be included before the DataFrame values.
+        # TODO: refactor to add show_col_idx_names property to
+        # DataFrameFormatter
+        return all((self.fmt.has_column_names,
+                    self.fmt.show_index_names,
+                    self.fmt.header))
+
+    @property
+    def row_levels(self):
+        if self.fmt.index:
+            # showing (row) index
+            return self.frame.index.nlevels
+        elif self.show_col_idx_names:
+            # see gh-22579
+            # Column misalignment also occurs for
+            # a standard index when the columns index is named.
+            # If the row index is not displayed a column of
+            # blank cells need to be included before the DataFrame values.
+            return 1
+        # not showing (row) index
+        return 0
+
     @property
     def is_truncated(self):
         return self.fmt.is_truncated
@@ -201,7 +230,7 @@ def write_result(self, buf):
 
     def _write_header(self, indent):
         truncate_h = self.fmt.truncate_h
-        row_levels = self.frame.index.nlevels
+
         if not self.fmt.header:
             # write nothing
             return indent
@@ -267,12 +296,26 @@ def _write_header(self, indent):
                         values = (values[:ins_col] + [u('...')] +
                                   values[ins_col:])
 
-                name = self.columns.names[lnum]
-                row = [''] * (row_levels - 1) + ['' if name is None else
-                                                 pprint_thing(name)]
-
-                if row == [""] and self.fmt.index is False:
-                    row = []
+                # see gh-22579
+                # Column Offset Bug with to_html(index=False) with
+                # MultiIndex Columns and Index.
+                # Initially fill row with blank cells before column names.
+                # TODO: Refactor to remove code duplication with code
+                # block below for standard columns index.
+                row = [''] * (self.row_levels - 1)
+                if self.fmt.index or self.show_col_idx_names:
+                    # see gh-22747
+                    # If to_html(index_names=False) do not show columns
+                    # index names.
+                    # TODO: Refactor to use _get_column_name_list from
+                    # DataFrameFormatter class and create a
+                    # _get_formatted_column_labels function for code
+                    # parity with DataFrameFormatter class.
+                    if self.fmt.show_index_names:
+                        name = self.columns.names[lnum]
+                        row.append(pprint_thing(name or ''))
+                    else:
+                        row.append('')
 
                 tags = {}
                 j = len(row)
@@ -287,18 +330,28 @@ def _write_header(self, indent):
                 self.write_tr(row, indent, self.indent_delta, tags=tags,
                               header=True)
         else:
-            if self.fmt.index:
-                row = [''] * (self.frame.index.nlevels - 1)
-                row.append(self.columns.name or '')
-            else:
-                row = []
+            # see gh-22579
+            # Column misalignment also occurs for
+            # a standard index when the columns index is named.
+            # Initially fill row with blank cells before column names.
+            # TODO: Refactor to remove code duplication with code block
+            # above for columns MultiIndex.
+            row = [''] * (self.row_levels - 1)
+            if self.fmt.index or self.show_col_idx_names:
+                # see gh-22747
+                # If to_html(index_names=False) do not show columns
+                # index names.
+                # TODO: Refactor to use _get_column_name_list from
+                # DataFrameFormatter class.
+                if self.fmt.show_index_names:
+                    row.append(self.columns.name or '')
+                else:
+                    row.append('')
             row.extend(self.columns)
             align = self.fmt.justify
 
             if truncate_h:
-                if not self.fmt.index:
-                    row_levels = 0
-                ins_col = row_levels + self.fmt.tr_col_num
+                ins_col = self.row_levels + self.fmt.tr_col_num
                 row.insert(ins_col, '...')
 
             self.write_tr(row, indent, self.indent_delta, header=True,
@@ -346,28 +399,31 @@ def _write_regular_rows(self, fmt_values, indent):
                 index_values = self.fmt.tr_frame.index.map(fmt)
             else:
                 index_values = self.fmt.tr_frame.index.format()
-            row_levels = 1
-        else:
-            row_levels = 0
 
         row = []
         for i in range(nrows):
 
             if truncate_v and i == (self.fmt.tr_row_num):
                 str_sep_row = ['...'] * len(row)
                 self.write_tr(str_sep_row, indent, self.indent_delta,
-                              tags=None, nindex_levels=row_levels)
+                              tags=None, nindex_levels=self.row_levels)
 
             row = []
             if self.fmt.index:
                 row.append(index_values[i])
+            # see gh-22579
+            # Column misalignment also occurs for
+            # a standard index when the columns index is named.
+            # Add blank cell before data cells.
+            elif self.show_col_idx_names:
+                row.append('')
             row.extend(fmt_values[j][i] for j in range(self.ncols))
 
             if truncate_h:
-                dot_col_ix = self.fmt.tr_col_num + row_levels
+                dot_col_ix = self.fmt.tr_col_num + self.row_levels
                 row.insert(dot_col_ix, '...')
             self.write_tr(row, indent, self.indent_delta, tags=None,
-                          nindex_levels=row_levels)
+                          nindex_levels=self.row_levels)
 
     def _write_hierarchical_rows(self, fmt_values, indent):
         template = 'rowspan="{span}" valign="top"'
@@ -376,6 +432,8 @@ def _write_hierarchical_rows(self, fmt_values, indent):
         truncate_v = self.fmt.truncate_v
         frame = self.fmt.tr_frame
         nrows = len(frame)
+        # TODO: after gh-22887 fixed, refactor to use class property
+        # in place of row_levels
         row_levels = self.frame.index.nlevels
 
         idx_values = frame.index.format(sparsify=False, adjoin=False,

diff --git a/...ormats/data/datetime64_hourformatter.html → ...s/data/html/datetime64_hourformatter.html b/...ormats/data/datetime64_hourformatter.html → ...s/data/html/datetime64_hourformatter.html
diff --git a/...rmats/data/datetime64_monthformatter.html → .../data/html/datetime64_monthformatter.html b/...rmats/data/datetime64_monthformatter.html → .../data/html/datetime64_monthformatter.html
diff --git a/...ests/io/formats/data/escape_disabled.html → ...io/formats/data/html/escape_disabled.html b/...ests/io/formats/data/escape_disabled.html → ...io/formats/data/html/escape_disabled.html
diff --git a/pandas/tests/io/formats/data/escaped.html → ...s/tests/io/formats/data/html/escaped.html b/pandas/tests/io/formats/data/escaped.html → ...s/tests/io/formats/data/html/escaped.html
diff --git a/...formats/data/gh12031_expected_output.html → ...ts/data/html/gh12031_expected_output.html b/...formats/data/gh12031_expected_output.html → ...ts/data/html/gh12031_expected_output.html
diff --git a/...rmats/data/gh14882_expected_output_1.html → .../data/html/gh14882_expected_output_1.html b/...rmats/data/gh14882_expected_output_1.html → .../data/html/gh14882_expected_output_1.html
diff --git a/...rmats/data/gh14882_expected_output_2.html → .../data/html/gh14882_expected_output_2.html b/...rmats/data/gh14882_expected_output_2.html → .../data/html/gh14882_expected_output_2.html
diff --git a/...formats/data/gh14998_expected_output.html → ...ts/data/html/gh14998_expected_output.html b/...formats/data/gh14998_expected_output.html → ...ts/data/html/gh14998_expected_output.html
diff --git a/...formats/data/gh15019_expected_output.html → ...ts/data/html/gh15019_expected_output.html b/...formats/data/gh15019_expected_output.html → ...ts/data/html/gh15019_expected_output.html
diff --git a/...formats/data/gh21625_expected_output.html → ...ts/data/html/gh21625_expected_output.html b/...formats/data/gh21625_expected_output.html → ...ts/data/html/gh21625_expected_output.html
diff --git a/...formats/data/gh22270_expected_output.html → ...ts/data/html/gh22270_expected_output.html b/...formats/data/gh22270_expected_output.html → ...ts/data/html/gh22270_expected_output.html
diff --git a/pandas/tests/io/formats/data/html/gh22579_expected_output.html b/pandas/tests/io/formats/data/html/gh22579_expected_output.html
@@ -0,0 +1,76 @@
+<table border="1" class="dataframe">
+  <thead>
+    <tr>
+      <th colspan="2" halign="left">a</th>
+      <th colspan="2" halign="left">b</th>
+    </tr>
+    <tr>
+      <th>c</th>
+      <th>d</th>
+      <th>c</th>
+      <th>d</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>0</td>
+      <td>10</td>
+      <td>10</td>
+      <td>10</td>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td>11</td>
+      <td>11</td>
+      <td>11</td>
+    </tr>
+    <tr>
+      <td>2</td>
+      <td>12</td>
+      <td>12</td>
+      <td>12</td>
+    </tr>
+    <tr>
+      <td>3</td>
+      <td>13</td>
+      <td>13</td>
+      <td>13</td>
+    </tr>
+    <tr>
+      <td>4</td>
+      <td>14</td>
+      <td>14</td>
+      <td>14</td>
+    </tr>
+    <tr>
+      <td>5</td>
+      <td>15</td>
+      <td>15</td>
+      <td>15</td>
+    </tr>
+    <tr>
+      <td>6</td>
+      <td>16</td>
+      <td>16</td>
+      <td>16</td>
+    </tr>
+    <tr>
+      <td>7</td>
+      <td>17</td>
+      <td>17</td>
+      <td>17</td>
+    </tr>
+    <tr>
+      <td>8</td>
+      <td>18</td>
+      <td>18</td>
+      <td>18</td>
+    </tr>
+    <tr>
+      <td>9</td>
+      <td>19</td>
+      <td>19</td>
+      <td>19</td>
+    </tr>
+  </tbody>
+</table>
diff --git a/...formats/data/gh22783_expected_output.html → ...ts/data/html/gh22783_expected_output.html b/...formats/data/gh22783_expected_output.html → ...ts/data/html/gh22783_expected_output.html
diff --git a/pandas/tests/io/formats/data/html/gh22783_named_columns_index.html b/pandas/tests/io/formats/data/html/gh22783_named_columns_index.html
@@ -0,0 +1,30 @@
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th>columns.name</th>
+      <th>0</th>
+      <th>1</th>
+      <th>...</th>
+      <th>3</th>
+      <th>4</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th></th>
+      <td>1.764052</td>
+      <td>0.400157</td>
+      <td>...</td>
+      <td>2.240893</td>
+      <td>1.867558</td>
+    </tr>
+    <tr>
+      <th></th>
+      <td>-0.977278</td>
+      <td>0.950088</td>
+      <td>...</td>
+      <td>-0.103219</td>
+      <td>0.410599</td>
+    </tr>
+  </tbody>
+</table>
diff --git a/.../formats/data/gh6131_expected_output.html → ...ats/data/html/gh6131_expected_output.html b/.../formats/data/gh6131_expected_output.html → ...ats/data/html/gh6131_expected_output.html
diff --git a/.../formats/data/gh8452_expected_output.html → ...ats/data/html/gh8452_expected_output.html b/.../formats/data/gh8452_expected_output.html → ...ats/data/html/gh8452_expected_output.html
diff --git a/pandas/tests/io/formats/data/index_1.html → ...s/tests/io/formats/data/html/index_1.html b/pandas/tests/io/formats/data/index_1.html → ...s/tests/io/formats/data/html/index_1.html
diff --git a/pandas/tests/io/formats/data/index_2.html → ...s/tests/io/formats/data/html/index_2.html b/pandas/tests/io/formats/data/index_2.html → ...s/tests/io/formats/data/html/index_2.html
diff --git a/pandas/tests/io/formats/data/index_3.html → ...s/tests/io/formats/data/html/index_3.html b/pandas/tests/io/formats/data/index_3.html → ...s/tests/io/formats/data/html/index_3.html
diff --git a/pandas/tests/io/formats/data/index_4.html → ...s/tests/io/formats/data/html/index_4.html b/pandas/tests/io/formats/data/index_4.html → ...s/tests/io/formats/data/html/index_4.html
diff --git a/pandas/tests/io/formats/data/index_5.html → ...s/tests/io/formats/data/html/index_5.html b/pandas/tests/io/formats/data/index_5.html → ...s/tests/io/formats/data/html/index_5.html
diff --git a/...ests/io/formats/data/index_formatter.html → ...io/formats/data/html/index_formatter.html b/...ests/io/formats/data/index_formatter.html → ...io/formats/data/html/index_formatter.html
diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html
@@ -0,0 +1,34 @@
+<table border="1" class="dataframe">
+  <thead>
+    <tr>
+      <th></th>
+      <th>columns.name.0</th>
+      <th colspan="2" halign="left">a</th>
+    </tr>
+    <tr>
+      <th></th>
+      <th>columns.name.1</th>
+      <th>b</th>
+      <th>c</th>
+    </tr>
+    <tr>
+      <th>index.name.0</th>
+      <th>index.name.1</th>
+      <th></th>
+      <th></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th rowspan="2" valign="top">a</th>
+      <th>b</th>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>c</th>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+  </tbody>
+</table>
diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html
@@ -0,0 +1,29 @@
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>columns.name</th>
+      <th>0</th>
+      <th>1</th>
+    </tr>
+    <tr>
+      <th>index.name.0</th>
+      <th>index.name.1</th>
+      <th></th>
+      <th></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th rowspan="2" valign="top">a</th>
+      <th>b</th>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>c</th>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+  </tbody>
+</table>
diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html
@@ -0,0 +1,34 @@
+<table border="1" class="dataframe">
+  <thead>
+    <tr>
+      <th></th>
+      <th></th>
+      <th colspan="2" halign="left">a</th>
+    </tr>
+    <tr>
+      <th></th>
+      <th></th>
+      <th>b</th>
+      <th>c</th>
+    </tr>
+    <tr>
+      <th>index.name.0</th>
+      <th>index.name.1</th>
+      <th></th>
+      <th></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th rowspan="2" valign="top">a</th>
+      <th>b</th>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>c</th>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+  </tbody>
+</table>