Skip to content

Commit

Permalink
Merge pull request #11870 from grahamjeffries/bugfix-11544
Browse files Browse the repository at this point in the history
raise NotImplemented for date parsing args in read_excel #11544
  • Loading branch information
jreback committed Dec 19, 2015
2 parents b56b1e4 + 2a066c0 commit a050a33
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 31 deletions.
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -340,4 +340,8 @@ Bug Fixes
- Bug in ``Index`` prevents copying name of passed ``Index``, when a new name is not provided (:issue:`11193`)

- Bug in ``read_excel`` failing to read any non-empty sheets when empty sheets exist and ``sheetname=None`` (:issue:`11711`)

- Bug in ``read_excel`` failing to raise ``NotImplemented`` error when keywords `parse_dates` and `date_parser` are provided (:issue:`11544`)

- Bug in ``read_sql`` with pymysql connections failing to return chunked data (:issue:`11522`)

9 changes: 8 additions & 1 deletion pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,14 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
stacklevel=3)

if 'chunksize' in kwds:
raise NotImplementedError("Reading an Excel file in chunks "
raise NotImplementedError("chunksize keyword of read_excel "
"is not implemented")
if parse_dates:
raise NotImplementedError("parse_dates keyword of read_excel "
"is not implemented")

if date_parser is not None:
raise NotImplementedError("date_parser keyword of read_excel "
"is not implemented")

import xlrd
Expand Down
70 changes: 40 additions & 30 deletions pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,9 @@ def test_parse_cols_int(self):

dfref = self.get_csv_refdf('test1')
dfref = dfref.reindex(columns=['A', 'B', 'C'])
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True,
parse_cols=3)
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=3)
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_dates=True, parse_cols=3)
parse_cols=3)
# TODO add index to xls file)
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)
Expand All @@ -179,10 +178,9 @@ def test_parse_cols_list(self):

dfref = self.get_csv_refdf('test1')
dfref = dfref.reindex(columns=['B', 'C'])
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True,
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0,
parse_cols=[0, 2, 3])
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_dates=True,
parse_cols=[0, 2, 3])
# TODO add index to xls file)
tm.assert_frame_equal(df1, dfref, check_names=False)
Expand All @@ -193,28 +191,28 @@ def test_parse_cols_str(self):
dfref = self.get_csv_refdf('test1')

df1 = dfref.reindex(columns=['A', 'B', 'C'])
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True,
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
parse_cols='A:D')
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_dates=True, parse_cols='A:D')
parse_cols='A:D')
# TODO add index to xls, read xls ignores index name ?
tm.assert_frame_equal(df2, df1, check_names=False)
tm.assert_frame_equal(df3, df1, check_names=False)

df1 = dfref.reindex(columns=['B', 'C'])
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True,
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
parse_cols='A,C,D')
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_dates=True, parse_cols='A,C,D')
parse_cols='A,C,D')
# TODO add index to xls file
tm.assert_frame_equal(df2, df1, check_names=False)
tm.assert_frame_equal(df3, df1, check_names=False)

df1 = dfref.reindex(columns=['B', 'C'])
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True,
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
parse_cols='A,C:D')
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_dates=True, parse_cols='A,C:D')
parse_cols='A,C:D')
tm.assert_frame_equal(df2, df1, check_names=False)
tm.assert_frame_equal(df3, df1, check_names=False)

Expand Down Expand Up @@ -251,23 +249,23 @@ def test_excel_table_sheet_by_index(self):
excel = self.get_excelfile('test1')
dfref = self.get_csv_refdf('test1')

df1 = read_excel(excel, 0, index_col=0, parse_dates=True)
df2 = read_excel(excel, 1, skiprows=[1], index_col=0, parse_dates=True)
df1 = read_excel(excel, 0, index_col=0)
df2 = read_excel(excel, 1, skiprows=[1], index_col=0)
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)

df1 = excel.parse(0, index_col=0, parse_dates=True)
df2 = excel.parse(1, skiprows=[1], index_col=0, parse_dates=True)
df1 = excel.parse(0, index_col=0)
df2 = excel.parse(1, skiprows=[1], index_col=0)
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)

df3 = read_excel(excel, 0, index_col=0, parse_dates=True, skipfooter=1)
df4 = read_excel(excel, 0, index_col=0, parse_dates=True, skip_footer=1)
df3 = read_excel(excel, 0, index_col=0, skipfooter=1)
df4 = read_excel(excel, 0, index_col=0, skip_footer=1)
tm.assert_frame_equal(df3, df1.ix[:-1])
tm.assert_frame_equal(df3, df4)

df3 = excel.parse(0, index_col=0, parse_dates=True, skipfooter=1)
df4 = excel.parse(0, index_col=0, parse_dates=True, skip_footer=1)
df3 = excel.parse(0, index_col=0, skipfooter=1)
df4 = excel.parse(0, index_col=0, skip_footer=1)
tm.assert_frame_equal(df3, df1.ix[:-1])
tm.assert_frame_equal(df3, df4)

Expand All @@ -279,16 +277,15 @@ def test_excel_table(self):

dfref = self.get_csv_refdf('test1')

df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True)
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_dates=True)
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0)
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0)
# TODO add index to file
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)

df3 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True,
df3 = self.get_exceldf('test1', 'Sheet1', index_col=0,
skipfooter=1)
df4 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True,
df4 = self.get_exceldf('test1', 'Sheet1', index_col=0,
skip_footer=1)
tm.assert_frame_equal(df3, df1.ix[:-1])
tm.assert_frame_equal(df3, df4)
Expand Down Expand Up @@ -389,7 +386,7 @@ def test_reading_all_sheets_with_blank(self):
basename = 'blank_with_header'
dfs = self.get_exceldf(basename, sheetname=None)
expected_keys = ['Sheet1', 'Sheet2', 'Sheet3']
tm.assert_contains_all(expected_keys, dfs.keys())
tm.assert_contains_all(expected_keys, dfs.keys())

# GH6403
def test_read_excel_blank(self):
Expand All @@ -411,14 +408,14 @@ class XlrdTests(ReadingTestsBase):
def test_excel_read_buffer(self):

pth = os.path.join(self.dirpath, 'test1' + self.ext)
expected = read_excel(pth, 'Sheet1', index_col=0, parse_dates=True)
expected = read_excel(pth, 'Sheet1', index_col=0)
with open(pth, 'rb') as f:
actual = read_excel(f, 'Sheet1', index_col=0, parse_dates=True)
actual = read_excel(f, 'Sheet1', index_col=0)
tm.assert_frame_equal(expected, actual)

with open(pth, 'rb') as f:
xls = ExcelFile(f)
actual = read_excel(xls, 'Sheet1', index_col=0, parse_dates=True)
actual = read_excel(xls, 'Sheet1', index_col=0)
tm.assert_frame_equal(expected, actual)

def test_read_xlrd_Book(self):
Expand Down Expand Up @@ -680,7 +677,7 @@ def test_excel_oldindex_format(self):
tm.assert_frame_equal(actual, expected, check_names=False)

def test_read_excel_bool_header_arg(self):
#GH 6114
# GH 6114
for arg in [True, False]:
with tm.assertRaises(TypeError):
pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
Expand All @@ -692,6 +689,19 @@ def test_read_excel_chunksize(self):
pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
chunksize=100)

def test_read_excel_parse_dates(self):
# GH 11544
with tm.assertRaises(NotImplementedError):
pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
parse_dates=True)

def test_read_excel_date_parser(self):
# GH 11544
with tm.assertRaises(NotImplementedError):
dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
date_parser=dateparse)

def test_read_excel_skiprows_list(self):
#GH 4903
actual = pd.read_excel(os.path.join(self.dirpath, 'testskiprows' + self.ext),
Expand Down Expand Up @@ -1093,7 +1103,7 @@ def test_to_excel_periodindex(self):
xp.to_excel(path, 'sht1')

reader = ExcelFile(path)
rs = read_excel(reader, 'sht1', index_col=0, parse_dates=True)
rs = read_excel(reader, 'sht1', index_col=0)
tm.assert_frame_equal(xp, rs.to_period('M'))

def test_to_excel_multiindex(self):
Expand Down

0 comments on commit a050a33

Please sign in to comment.