diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index cba21ce7ee1e6..316fc21c126ac 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -51,7 +51,7 @@ Bug Fixes **I/O** -- +- Bug in reading a HDF5 table-format ``DataFrame`` created in Python 2, in Python 3 (:issue:`24925`) - - diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4e103482f48a2..2ab6ddb5b25c7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3288,7 +3288,7 @@ def get_attrs(self): self.nan_rep = getattr(self.attrs, 'nan_rep', None) self.encoding = _ensure_encoding( getattr(self.attrs, 'encoding', None)) - self.errors = getattr(self.attrs, 'errors', 'strict') + self.errors = _ensure_decoded(getattr(self.attrs, 'errors', 'strict')) self.levels = getattr( self.attrs, 'levels', None) or [] self.index_axes = [ diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 new file mode 100644 index 0000000000000..3863d714a315b Binary files /dev/null and b/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 differ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 517a3e059469c..9430011288f27 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4540,7 +4540,7 @@ def test_pytables_native2_read(self, datapath): def test_legacy_table_fixed_format_read_py2(self, datapath): # GH 24510 - # legacy table with fixed format written en Python 2 + # legacy table with fixed format written in Python 2 with ensure_clean_store( datapath('io', 'data', 'legacy_hdf', 'legacy_table_fixed_py2.h5'), @@ -4552,6 +4552,21 @@ def test_legacy_table_fixed_format_read_py2(self, datapath): name='INDEX_NAME')) assert_frame_equal(expected, result) + def test_legacy_table_read_py2(self, datapath): + # issue: 24925 + # legacy table written in Python 2 + with ensure_clean_store( + datapath('io', 'data', 'legacy_hdf', + 'legacy_table_py2.h5'), + mode='r') as store: + result = store.select('table') + + expected = pd.DataFrame({ + "a": ["a", "b"], + "b": [2, 3] + }) + assert_frame_equal(expected, result) + def test_legacy_table_read(self, datapath): # legacy table types with ensure_clean_store(