diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index d3bb28c2aee65..49d16a7b5290f 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -495,6 +495,7 @@ Other Enhancements
- :meth:`DataFrame.to_sql` now performs a multivalue insert if the underlying connection supports itk rather than inserting row by row.
``SQLAlchemy`` dialects supporting multivalue inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`)
- :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`)
+- :func:`read_html` now reads all ``
`` elements in a ````, not just the first. (:issue:`20690`)
- :meth:`~pandas.core.window.Rolling.quantile` and :meth:`~pandas.core.window.Expanding.quantile` now accept the ``interpolation`` keyword, ``linear`` by default (:issue:`20497`)
- zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`)
- :class:`pandas.tseries.api.offsets.WeekOfMonth` constructor now supports ``n=0`` (:issue:`20517`).
diff --git a/pandas/io/html.py b/pandas/io/html.py
index ba5da1b4e3a76..8fd876e85889f 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -324,7 +324,7 @@ def _parse_thead(self, table):
raise com.AbstractMethodError(self)
def _parse_tbody(self, table):
- """Return the body of the table.
+ """Return the list of tbody elements from the parsed table element.
Parameters
----------
@@ -333,8 +333,8 @@ def _parse_tbody(self, table):
Returns
-------
- tbody : node-like
- A ... element.
+ tbodys : list of node-like
+ A list of ... elements
"""
raise com.AbstractMethodError(self)
@@ -388,13 +388,17 @@ def _parse_raw_tfoot(self, table):
np.array(res).squeeze()) if res and len(res) == 1 else res
def _parse_raw_tbody(self, table):
- tbody = self._parse_tbody(table)
+ tbodies = self._parse_tbody(table)
- try:
- res = self._parse_tr(tbody[0])
- except IndexError:
- res = self._parse_tr(table)
- return self._parse_raw_data(res)
+ raw_data = []
+
+ if tbodies:
+ for tbody in tbodies:
+ raw_data.extend(self._parse_tr(tbody))
+ else:
+ raw_data.extend(self._parse_tr(table))
+
+ return self._parse_raw_data(raw_data)
def _handle_hidden_tables(self, tbl_list, attr_name):
"""Returns list of tables, potentially removing hidden elements
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 078b5f8448d46..a56946b82b027 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -396,6 +396,33 @@ def test_empty_tables(self):
res2 = self.read_html(StringIO(data2))
assert_framelist_equal(res1, res2)
+ def test_multiple_tbody(self):
+ # GH-20690
+ # Read all tbody tags within a single table.
+ data = '''
+
+
+ A |
+ B |
+
+
+
+
+ 1 |
+ 2 |
+
+
+
+
+ 3 |
+ 4 |
+
+
+
'''
+ expected = DataFrame({'A': [1, 3], 'B': [2, 4]})
+ result = self.read_html(StringIO(data))[0]
+ tm.assert_frame_equal(result, expected)
+
def test_header_and_one_column(self):
"""
Don't fail with bs4 when there is a header and only one column