From 2a066c09f7462ef1e9e362c96aa8d8ed43f72326 Mon Sep 17 00:00:00 2001 From: Graham Jeffries Date: Sat, 19 Dec 2015 11:17:49 -0500 Subject: [PATCH] raise NotImplemented for date parsing args in read_excel --- doc/source/whatsnew/v0.18.0.txt | 4 ++ pandas/io/excel.py | 9 ++++- pandas/io/tests/test_excel.py | 70 +++++++++++++++++++-------------- 3 files changed, 52 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 21e3e86e07f37..c8a83029bed9c 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -340,4 +340,8 @@ Bug Fixes - Bug in ``Index`` prevents copying name of passed ``Index``, when a new name is not provided (:issue:`11193`) - Bug in ``read_excel`` failing to read any non-empty sheets when empty sheets exist and ``sheetname=None`` (:issue:`11711`) + +- Bug in ``read_excel`` failing to raise ``NotImplemented`` error when keywords `parse_dates` and `date_parser` are provided (:issue:`11544`) + - Bug in ``read_sql`` with pymysql connections failing to return chunked data (:issue:`11522`) + diff --git a/pandas/io/excel.py b/pandas/io/excel.py index dec7da513fb42..2b9ba56447dee 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -293,7 +293,14 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0, stacklevel=3) if 'chunksize' in kwds: - raise NotImplementedError("Reading an Excel file in chunks " + raise NotImplementedError("chunksize keyword of read_excel " + "is not implemented") + if parse_dates: + raise NotImplementedError("parse_dates keyword of read_excel " + "is not implemented") + + if date_parser is not None: + raise NotImplementedError("date_parser keyword of read_excel " "is not implemented") import xlrd diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index cc9f8c564ebf5..8023c25cdd660 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -167,10 +167,9 @@ def test_parse_cols_int(self): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, - parse_cols=3) + df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=3) df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_dates=True, parse_cols=3) + parse_cols=3) # TODO add index to xls file) tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) @@ -179,10 +178,9 @@ def test_parse_cols_list(self): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, + df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=[0, 2, 3]) df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_dates=True, parse_cols=[0, 2, 3]) # TODO add index to xls file) tm.assert_frame_equal(df1, dfref, check_names=False) @@ -193,28 +191,28 @@ def test_parse_cols_str(self): dfref = self.get_csv_refdf('test1') df1 = dfref.reindex(columns=['A', 'B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, + df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols='A:D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_dates=True, parse_cols='A:D') + parse_cols='A:D') # TODO add index to xls, read xls ignores index name ? tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, + df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols='A,C,D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_dates=True, parse_cols='A,C,D') + parse_cols='A,C,D') # TODO add index to xls file tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, + df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols='A,C:D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_dates=True, parse_cols='A,C:D') + parse_cols='A,C:D') tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) @@ -251,23 +249,23 @@ def test_excel_table_sheet_by_index(self): excel = self.get_excelfile('test1') dfref = self.get_csv_refdf('test1') - df1 = read_excel(excel, 0, index_col=0, parse_dates=True) - df2 = read_excel(excel, 1, skiprows=[1], index_col=0, parse_dates=True) + df1 = read_excel(excel, 0, index_col=0) + df2 = read_excel(excel, 1, skiprows=[1], index_col=0) tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - df1 = excel.parse(0, index_col=0, parse_dates=True) - df2 = excel.parse(1, skiprows=[1], index_col=0, parse_dates=True) + df1 = excel.parse(0, index_col=0) + df2 = excel.parse(1, skiprows=[1], index_col=0) tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - df3 = read_excel(excel, 0, index_col=0, parse_dates=True, skipfooter=1) - df4 = read_excel(excel, 0, index_col=0, parse_dates=True, skip_footer=1) + df3 = read_excel(excel, 0, index_col=0, skipfooter=1) + df4 = read_excel(excel, 0, index_col=0, skip_footer=1) tm.assert_frame_equal(df3, df1.ix[:-1]) tm.assert_frame_equal(df3, df4) - df3 = excel.parse(0, index_col=0, parse_dates=True, skipfooter=1) - df4 = excel.parse(0, index_col=0, parse_dates=True, skip_footer=1) + df3 = excel.parse(0, index_col=0, skipfooter=1) + df4 = excel.parse(0, index_col=0, skip_footer=1) tm.assert_frame_equal(df3, df1.ix[:-1]) tm.assert_frame_equal(df3, df4) @@ -279,16 +277,15 @@ def test_excel_table(self): dfref = self.get_csv_refdf('test1') - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_dates=True) + df1 = self.get_exceldf('test1', 'Sheet1', index_col=0) + df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0) # TODO add index to file tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - df3 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, + df3 = self.get_exceldf('test1', 'Sheet1', index_col=0, skipfooter=1) - df4 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, + df4 = self.get_exceldf('test1', 'Sheet1', index_col=0, skip_footer=1) tm.assert_frame_equal(df3, df1.ix[:-1]) tm.assert_frame_equal(df3, df4) @@ -389,7 +386,7 @@ def test_reading_all_sheets_with_blank(self): basename = 'blank_with_header' dfs = self.get_exceldf(basename, sheetname=None) expected_keys = ['Sheet1', 'Sheet2', 'Sheet3'] - tm.assert_contains_all(expected_keys, dfs.keys()) + tm.assert_contains_all(expected_keys, dfs.keys()) # GH6403 def test_read_excel_blank(self): @@ -411,14 +408,14 @@ class XlrdTests(ReadingTestsBase): def test_excel_read_buffer(self): pth = os.path.join(self.dirpath, 'test1' + self.ext) - expected = read_excel(pth, 'Sheet1', index_col=0, parse_dates=True) + expected = read_excel(pth, 'Sheet1', index_col=0) with open(pth, 'rb') as f: - actual = read_excel(f, 'Sheet1', index_col=0, parse_dates=True) + actual = read_excel(f, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) with open(pth, 'rb') as f: xls = ExcelFile(f) - actual = read_excel(xls, 'Sheet1', index_col=0, parse_dates=True) + actual = read_excel(xls, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) def test_read_xlrd_Book(self): @@ -680,7 +677,7 @@ def test_excel_oldindex_format(self): tm.assert_frame_equal(actual, expected, check_names=False) def test_read_excel_bool_header_arg(self): - #GH 6114 + # GH 6114 for arg in [True, False]: with tm.assertRaises(TypeError): pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), @@ -692,6 +689,19 @@ def test_read_excel_chunksize(self): pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), chunksize=100) + def test_read_excel_parse_dates(self): + # GH 11544 + with tm.assertRaises(NotImplementedError): + pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + parse_dates=True) + + def test_read_excel_date_parser(self): + # GH 11544 + with tm.assertRaises(NotImplementedError): + dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') + pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + date_parser=dateparse) + def test_read_excel_skiprows_list(self): #GH 4903 actual = pd.read_excel(os.path.join(self.dirpath, 'testskiprows' + self.ext), @@ -1093,7 +1103,7 @@ def test_to_excel_periodindex(self): xp.to_excel(path, 'sht1') reader = ExcelFile(path) - rs = read_excel(reader, 'sht1', index_col=0, parse_dates=True) + rs = read_excel(reader, 'sht1', index_col=0) tm.assert_frame_equal(xp, rs.to_period('M')) def test_to_excel_multiindex(self):