Skip to content

Commit

Permalink
BUG: Fix read of py3 PeriodIndex DataFrame HDF made in py2 (#16781)
Browse files Browse the repository at this point in the history
In Python3, reading a DataFrame with a PeriodIndex from an HDF file
created in Python2 would incorrectly return a DataFrame with an
Int64Index.
  • Loading branch information
forbdonut committed Jun 29, 2017
1 parent 664348c commit 7cb4ba4
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 3 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Performance Improvements
Bug Fixes
~~~~~~~~~
- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)

- Fixed issue with loading a DataFrame with a ``PeriodIndex`` in Python3 that was written in Python2 where the index would come back as a ``Int64Index`` (:issue:`16781`)



Expand Down
4 changes: 2 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2591,8 +2591,8 @@ def read_index_node(self, node, start=None, stop=None):
if 'name' in node._v_attrs:
name = _ensure_str(node._v_attrs.name)

index_class = self._alias_to_class(getattr(node._v_attrs,
'index_class', ''))
index_class = self._alias_to_class(_ensure_decoded(
getattr(node._v_attrs, 'index_class', '')))
factory = self._get_index_factory(index_class)

kwargs = {}
Expand Down
Binary file not shown.
20 changes: 20 additions & 0 deletions pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5264,6 +5264,26 @@ def test_fspath(self):
with pd.HDFStore(path) as store:
assert os.fspath(store) == str(path)

def test_read_py2_hdf_file_in_py3(self):
# GH 16781

# tests reading a PeriodIndex DataFrame written in Python2 in Python3

# the file was generated in Python 2.7 like so:
#
# df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(
# ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
# df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')

expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex(
['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))

with ensure_clean_store(
tm.get_data_path('periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
mode='r') as store:
result = store['p']
assert_frame_equal(result, expected)


class TestHDFComplexValues(Base):
# GH10447
Expand Down

0 comments on commit 7cb4ba4

Please sign in to comment.