-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TST: Decoupled more xlrd reading tests from openpyxl #27114
Changes from 1 commit
9994e3c
63a1a9f
66363b4
559f13c
d32ac00
cf7eaf0
cb52c3a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,6 @@ | |
import pandas.util.testing as tm | ||
|
||
from pandas.io.common import URLError | ||
from pandas.io.excel import ExcelFile | ||
|
||
|
||
@contextlib.contextmanager | ||
|
@@ -736,39 +735,44 @@ class TestExcelFileRead: | |
pytest.param(None, marks=pytest.mark.skipif( | ||
not td.safe_import("xlrd"), reason="no xlrd")), | ||
]) | ||
def cd_and_set_engine(self, request, datapath, monkeypatch): | ||
def cd_and_set_engine(self, request, datapath, monkeypatch, read_ext): | ||
""" | ||
Change directory and set engine for ExcelFile objects. | ||
""" | ||
if request.param == 'openpyxl' and read_ext == '.xls': | ||
pytest.skip() | ||
|
||
func = partial(pd.ExcelFile, engine=request.param) | ||
monkeypatch.chdir(datapath("io", "data")) | ||
monkeypatch.setattr(pd, 'ExcelFile', func) | ||
|
||
def test_excel_passes_na(self, read_ext): | ||
|
||
excel = ExcelFile('test4' + read_ext) | ||
excel = pd.ExcelFile('test4' + read_ext) | ||
|
||
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, | ||
na_values=['apple']) | ||
expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']], | ||
columns=['Test']) | ||
tm.assert_frame_equal(parsed, expected) | ||
|
||
excel = pd.ExcelFile('test4' + read_ext) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. openpyxl seems to exhaust a |
||
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, | ||
na_values=['apple']) | ||
expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], | ||
columns=['Test']) | ||
tm.assert_frame_equal(parsed, expected) | ||
|
||
# 13967 | ||
excel = ExcelFile('test5' + read_ext) | ||
excel = pd.ExcelFile('test5' + read_ext) | ||
|
||
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, | ||
na_values=['apple']) | ||
expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']], | ||
columns=['Test']) | ||
tm.assert_frame_equal(parsed, expected) | ||
|
||
excel = pd.ExcelFile('test5' + read_ext) | ||
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, | ||
na_values=['apple']) | ||
expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], | ||
|
@@ -778,7 +782,7 @@ def test_excel_passes_na(self, read_ext): | |
@pytest.mark.parametrize('arg', ['sheet', 'sheetname', 'parse_cols']) | ||
def test_unexpected_kwargs_raises(self, read_ext, arg): | ||
# gh-17964 | ||
excel = ExcelFile('test1' + read_ext) | ||
excel = pd.ExcelFile('test1' + read_ext) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we normally need to close the file? use in a context manager? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like it depends on what type of object ExcelFile is interacting with but ultimately converted usage to a context manager to be safe |
||
|
||
kwarg = {arg: 'Sheet1'} | ||
msg = "unexpected keyword argument `{}`".format(arg) | ||
|
@@ -787,70 +791,67 @@ def test_unexpected_kwargs_raises(self, read_ext, arg): | |
|
||
def test_excel_table_sheet_by_index(self, read_ext, df_ref): | ||
|
||
excel = ExcelFile('test1' + read_ext) | ||
excel = pd.ExcelFile('test1' + read_ext) | ||
|
||
df1 = pd.read_excel(excel, 0, index_col=0) | ||
df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) | ||
tm.assert_frame_equal(df1, df_ref, check_names=False) | ||
tm.assert_frame_equal(df2, df_ref, check_names=False) | ||
|
||
excel = pd.ExcelFile('test1' + read_ext) | ||
df1 = excel.parse(0, index_col=0) | ||
df2 = excel.parse(1, skiprows=[1], index_col=0) | ||
tm.assert_frame_equal(df1, df_ref, check_names=False) | ||
tm.assert_frame_equal(df2, df_ref, check_names=False) | ||
|
||
excel = pd.ExcelFile('test1' + read_ext) | ||
df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) | ||
tm.assert_frame_equal(df3, df1.iloc[:-1]) | ||
|
||
excel = pd.ExcelFile('test1' + read_ext) | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1) | ||
tm.assert_frame_equal(df3, df4) | ||
|
||
excel = pd.ExcelFile('test1' + read_ext) | ||
df3 = excel.parse(0, index_col=0, skipfooter=1) | ||
tm.assert_frame_equal(df3, df1.iloc[:-1]) | ||
|
||
import xlrd # will move to engine-specific tests as new ones are added | ||
with pytest.raises(xlrd.XLRDError): | ||
pd.read_excel(excel, 'asdf') | ||
|
||
def test_sheet_name(self, read_ext, df_ref): | ||
filename = "test1" | ||
sheet_name = "Sheet1" | ||
|
||
excel = ExcelFile(filename + read_ext) | ||
excel = pd.ExcelFile(filename + read_ext) | ||
df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc | ||
excel = pd.ExcelFile(filename + read_ext) | ||
df2_parse = excel.parse(index_col=0, | ||
sheet_name=sheet_name) | ||
|
||
tm.assert_frame_equal(df1_parse, df_ref, check_names=False) | ||
tm.assert_frame_equal(df2_parse, df_ref, check_names=False) | ||
|
||
def test_excel_read_buffer(self, read_ext): | ||
|
||
pth = 'test1' + read_ext | ||
expected = pd.read_excel(pth, 'Sheet1', index_col=0) | ||
engine = pd.ExcelFile.keywords['engine'] # TODO: fixturize | ||
expected = pd.read_excel(pth, 'Sheet1', index_col=0, engine=engine) | ||
|
||
with open(pth, 'rb') as f: | ||
xls = ExcelFile(f) | ||
xls = pd.ExcelFile(f) | ||
actual = pd.read_excel(xls, 'Sheet1', index_col=0) | ||
tm.assert_frame_equal(expected, actual) | ||
|
||
def test_reader_closes_file(self, read_ext): | ||
|
||
f = open('test1' + read_ext, 'rb') | ||
with ExcelFile(f) as xlsx: | ||
# parses okay | ||
pd.read_excel(xlsx, 'Sheet1', index_col=0) | ||
|
||
assert f.closed | ||
|
||
@pytest.mark.parametrize('excel_engine', [ | ||
'xlrd', | ||
None | ||
]) | ||
def test_read_excel_engine_value(self, read_ext, excel_engine): | ||
# GH 26566 | ||
xl = ExcelFile("test1" + read_ext, engine=excel_engine) | ||
msg = "Engine should not be specified when passing an ExcelFile" | ||
with pytest.raises(ValueError, match=msg): | ||
pd.read_excel(xl, engine='openpyxl') | ||
@td.skip_if_no("openpyxl") | ||
@pytest.mark.parametrize('excel_engine', [ | ||
pytest.param('xlrd', marks=pytest.mark.skipif( | ||
not td.safe_import("xlrd"), reason="no xlrd")), | ||
pytest.param(None, marks=pytest.mark.skipif( | ||
not td.safe_import("xlrd"), reason="no xlrd")), | ||
]) | ||
def test_conflicting_excel_engines(read_ext, excel_engine, datapath): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This didn't really follow the same rules for parametrization as the rest of the items in |
||
# GH 26566 | ||
path = datapath("io", "data", 'test1{}'.format(read_ext)) | ||
xl = pd.ExcelFile(path, engine=excel_engine) | ||
msg = "Engine should not be specified when passing an ExcelFile" | ||
with pytest.raises(ValueError, match=msg): | ||
pd.read_excel(xl, engine='openpyxl') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We were monkeypatching
pd.ExcelFile
but usedExcelFile
imported directly in tests which was causing this to run exclusive of the specified engine. Figured easiest just to stick with the pd namespace