diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 55da3812573c1..3ffc3c12852c0 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -38,6 +38,7 @@ Bug Fixes ~~~~~~~~~ - Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`) - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) +- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) Conversion diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1b944936ec3e7..83c5e2278d339 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2584,8 +2584,8 @@ def read_index_node(self, node, start=None, stop=None): if 'name' in node._v_attrs: name = _ensure_str(node._v_attrs.name) - index_class = self._alias_to_class(getattr(node._v_attrs, - 'index_class', '')) + index_class = self._alias_to_class(_ensure_decoded( + getattr(node._v_attrs, 'index_class', ''))) factory = self._get_index_factory(index_class) kwargs = {} diff --git a/pandas/tests/io/data/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 b/pandas/tests/io/data/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 new file mode 100644 index 0000000000000..6fb92d3c564bd Binary files /dev/null and b/pandas/tests/io/data/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 differ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 1cdb2c058c9be..43084ac589078 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5207,6 +5207,26 @@ def test_fspath(self): with pd.HDFStore(path) as store: assert os.fspath(store) == str(path) + def test_read_py2_hdf_file_in_py3(self): + # GH 16781 + + # tests reading a PeriodIndex DataFrame written in Python2 in Python3 + + # the file was generated in Python 2.7 like so: + # + # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex( + # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) + # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p') + + expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex( + ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) + + with ensure_clean_store( + tm.get_data_path('periodindex_0.20.1_x86_64_darwin_2.7.13.h5'), + mode='r') as store: + result = store['p'] + assert_frame_equal(result, expected) + class TestHDFComplexValues(Base): # GH10447