Skip to content

Commit

Permalink
MAINT: Drop has_index_names input from read_excel (#16522)
Browse files Browse the repository at this point in the history
  • Loading branch information
gfyoung authored and jreback committed Jun 1, 2017
1 parent fc4408b commit db419bf
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 57 deletions.
5 changes: 0 additions & 5 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2739,11 +2739,6 @@ should be passed to ``index_col`` and ``header``
import os
os.remove('path_to_file.xlsx')
.. warning::

Excel files saved in version 0.16.2 or prior that had index names will still able to be read in,
but the ``has_index_names`` argument must specified to ``True``.

Parsing Specific Columns
++++++++++++++++++++++++
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ Deprecations
Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`)


.. _whatsnew_0210.performance:
Expand Down
40 changes: 13 additions & 27 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@
convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
data will be read in as floats: Excel stores all numbers as floats
internally
has_index_names : boolean, default None
DEPRECATED: for version 0.17+ index names will be automatically
inferred based on index_col. To read Excel output from 0.16.2 and
prior that had saved index names, use True.
Returns
-------
Expand Down Expand Up @@ -198,8 +194,8 @@ def get_writer(engine_name):
def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
index_col=None, names=None, parse_cols=None, parse_dates=False,
date_parser=None, na_values=None, thousands=None,
convert_float=True, has_index_names=None, converters=None,
dtype=None, true_values=None, false_values=None, engine=None,
convert_float=True, converters=None, dtype=None,
true_values=None, false_values=None, engine=None,
squeeze=False, **kwds):

# Can't use _deprecate_kwarg since sheetname=None has a special meaning
Expand All @@ -218,10 +214,9 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
sheetname=sheet_name, header=header, skiprows=skiprows, names=names,
index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
date_parser=date_parser, na_values=na_values, thousands=thousands,
convert_float=convert_float, has_index_names=has_index_names,
skip_footer=skip_footer, converters=converters, dtype=dtype,
true_values=true_values, false_values=false_values, squeeze=squeeze,
**kwds)
convert_float=convert_float, skip_footer=skip_footer,
converters=converters, dtype=dtype, true_values=true_values,
false_values=false_values, squeeze=squeeze, **kwds)


class ExcelFile(object):
Expand Down Expand Up @@ -283,9 +278,8 @@ def __fspath__(self):
def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
names=None, index_col=None, parse_cols=None, parse_dates=False,
date_parser=None, na_values=None, thousands=None,
convert_float=True, has_index_names=None,
converters=None, true_values=None, false_values=None,
squeeze=False, **kwds):
convert_float=True, converters=None, true_values=None,
false_values=None, squeeze=False, **kwds):
"""
Parse specified sheet(s) into a DataFrame
Expand All @@ -296,7 +290,6 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
return self._parse_excel(sheetname=sheet_name, header=header,
skiprows=skiprows, names=names,
index_col=index_col,
has_index_names=has_index_names,
parse_cols=parse_cols,
parse_dates=parse_dates,
date_parser=date_parser, na_values=na_values,
Expand Down Expand Up @@ -343,23 +336,17 @@ def _excel2num(x):
return i in parse_cols

def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
skip_footer=0, index_col=None, has_index_names=None,
parse_cols=None, parse_dates=False, date_parser=None,
na_values=None, thousands=None, convert_float=True,
true_values=None, false_values=None, verbose=False,
dtype=None, squeeze=False, **kwds):
skip_footer=0, index_col=None, parse_cols=None,
parse_dates=False, date_parser=None, na_values=None,
thousands=None, convert_float=True, true_values=None,
false_values=None, verbose=False, dtype=None,
squeeze=False, **kwds):

skipfooter = kwds.pop('skipfooter', None)
if skipfooter is not None:
skip_footer = skipfooter

_validate_header_arg(header)
if has_index_names is not None:
warn("\nThe has_index_names argument is deprecated; index names "
"will be automatically inferred based on index_col.\n"
"This argmument is still necessary if reading Excel output "
"from 0.16.2 or prior with index names.", FutureWarning,
stacklevel=3)

if 'chunksize' in kwds:
raise NotImplementedError("chunksize keyword of read_excel "
Expand Down Expand Up @@ -511,8 +498,7 @@ def _parse_cell(cell_contents, cell_typ):
else:
last = data[row][col]

if is_list_like(header) and len(header) > 1:
has_index_names = True
has_index_names = is_list_like(header) and len(header) > 1

# GH 12292 : error when read one empty column from excel file
try:
Expand Down
63 changes: 38 additions & 25 deletions pandas/tests/io/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,8 +881,42 @@ def test_excel_multindex_roundtrip(self):
tm.assert_frame_equal(
df, act, check_names=check_names)

def test_excel_oldindex_format(self):
# GH 4679
def test_excel_old_index_format(self):
# see gh-4679
filename = 'test_index_name_pre17' + self.ext
in_file = os.path.join(self.dirpath, filename)

# We detect headers to determine if index names exist, so
# that "index" name in the "names" version of the data will
# now be interpreted as rows that include null data.
data = np.array([[None, None, None, None, None],
['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'],
['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']])
columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4']
mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1',
'R_l0_g2', 'R_l0_g3', 'R_l0_g4'],
['R1', 'R_l1_g0', 'R_l1_g1',
'R_l1_g2', 'R_l1_g3', 'R_l1_g4']],
labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]],
names=[None, None])
si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2',
'R_l0_g3', 'R_l0_g4'], name=None)

expected = pd.DataFrame(data, index=si, columns=columns)

actual = pd.read_excel(in_file, 'single_names')
tm.assert_frame_equal(actual, expected)

expected.index = mi

actual = pd.read_excel(in_file, 'multi_names')
tm.assert_frame_equal(actual, expected)

# The analogous versions of the "names" version data
# where there are explicitly no names for the indices.
data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
Expand All @@ -894,40 +928,19 @@ def test_excel_oldindex_format(self):
['R_l1_g0', 'R_l1_g1', 'R_l1_g2',
'R_l1_g3', 'R_l1_g4']],
labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]],
names=['R0', 'R1'])
names=[None, None])
si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2',
'R_l0_g3', 'R_l0_g4'], name='R0')

in_file = os.path.join(
self.dirpath, 'test_index_name_pre17' + self.ext)
'R_l0_g3', 'R_l0_g4'], name=None)

expected = pd.DataFrame(data, index=si, columns=columns)
with tm.assert_produces_warning(FutureWarning):
actual = pd.read_excel(
in_file, 'single_names', has_index_names=True)
tm.assert_frame_equal(actual, expected)

expected.index.name = None
actual = pd.read_excel(in_file, 'single_no_names')
tm.assert_frame_equal(actual, expected)
with tm.assert_produces_warning(FutureWarning):
actual = pd.read_excel(
in_file, 'single_no_names', has_index_names=False)
tm.assert_frame_equal(actual, expected)

expected.index = mi
with tm.assert_produces_warning(FutureWarning):
actual = pd.read_excel(
in_file, 'multi_names', has_index_names=True)
tm.assert_frame_equal(actual, expected)

expected.index.names = [None, None]
actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1])
tm.assert_frame_equal(actual, expected, check_names=False)
with tm.assert_produces_warning(FutureWarning):
actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1],
has_index_names=False)
tm.assert_frame_equal(actual, expected, check_names=False)

def test_read_excel_bool_header_arg(self):
# GH 6114
Expand Down

0 comments on commit db419bf

Please sign in to comment.