Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: Decoupled more xlrd reading tests from openpyxl #27114

Merged
merged 7 commits into from
Jun 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 53 additions & 50 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import pandas.util.testing as tm

from pandas.io.common import URLError
from pandas.io.excel import ExcelFile


@contextlib.contextmanager
Expand Down Expand Up @@ -736,121 +735,125 @@ class TestExcelFileRead:
pytest.param(None, marks=pytest.mark.skipif(
not td.safe_import("xlrd"), reason="no xlrd")),
])
def cd_and_set_engine(self, request, datapath, monkeypatch):
def cd_and_set_engine(self, request, datapath, monkeypatch, read_ext):
"""
Change directory and set engine for ExcelFile objects.
"""
if request.param == 'openpyxl' and read_ext == '.xls':
pytest.skip()

func = partial(pd.ExcelFile, engine=request.param)
monkeypatch.chdir(datapath("io", "data"))
monkeypatch.setattr(pd, 'ExcelFile', func)

def test_excel_passes_na(self, read_ext):

excel = ExcelFile('test4' + read_ext)

parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
na_values=['apple'])
with pd.ExcelFile('test4' + read_ext) as excel:
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
na_values=['apple'])
expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
columns=['Test'])
tm.assert_frame_equal(parsed, expected)

parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
na_values=['apple'])
with pd.ExcelFile('test4' + read_ext) as excel:
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
na_values=['apple'])
expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
columns=['Test'])
tm.assert_frame_equal(parsed, expected)

# 13967
excel = ExcelFile('test5' + read_ext)

parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
na_values=['apple'])
with pd.ExcelFile('test5' + read_ext) as excel:
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
na_values=['apple'])
expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']],
columns=['Test'])
tm.assert_frame_equal(parsed, expected)

parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
na_values=['apple'])
with pd.ExcelFile('test5' + read_ext) as excel:
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
na_values=['apple'])
expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
columns=['Test'])
tm.assert_frame_equal(parsed, expected)

@pytest.mark.parametrize('arg', ['sheet', 'sheetname', 'parse_cols'])
def test_unexpected_kwargs_raises(self, read_ext, arg):
# gh-17964
excel = ExcelFile('test1' + read_ext)

kwarg = {arg: 'Sheet1'}
msg = "unexpected keyword argument `{}`".format(arg)
with pytest.raises(TypeError, match=msg):
pd.read_excel(excel, **kwarg)

def test_excel_table_sheet_by_index(self, read_ext, df_ref):
with pd.ExcelFile('test1' + read_ext) as excel:
with pytest.raises(TypeError, match=msg):
pd.read_excel(excel, **kwarg)

excel = ExcelFile('test1' + read_ext)
def test_excel_table_sheet_by_index(self, read_ext, df_ref):

df1 = pd.read_excel(excel, 0, index_col=0)
df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0)
with pd.ExcelFile('test1' + read_ext) as excel:
df1 = pd.read_excel(excel, 0, index_col=0)
df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0)
tm.assert_frame_equal(df1, df_ref, check_names=False)
tm.assert_frame_equal(df2, df_ref, check_names=False)

df1 = excel.parse(0, index_col=0)
df2 = excel.parse(1, skiprows=[1], index_col=0)
with pd.ExcelFile('test1' + read_ext) as excel:
df1 = excel.parse(0, index_col=0)
df2 = excel.parse(1, skiprows=[1], index_col=0)
tm.assert_frame_equal(df1, df_ref, check_names=False)
tm.assert_frame_equal(df2, df_ref, check_names=False)

df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1)
with pd.ExcelFile('test1' + read_ext) as excel:
df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1)
tm.assert_frame_equal(df3, df1.iloc[:-1])

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1)
with pd.ExcelFile('test1' + read_ext) as excel:
df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1)

tm.assert_frame_equal(df3, df4)

df3 = excel.parse(0, index_col=0, skipfooter=1)
tm.assert_frame_equal(df3, df1.iloc[:-1])
with pd.ExcelFile('test1' + read_ext) as excel:
df3 = excel.parse(0, index_col=0, skipfooter=1)

import xlrd # will move to engine-specific tests as new ones are added
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, 'asdf')
tm.assert_frame_equal(df3, df1.iloc[:-1])

def test_sheet_name(self, read_ext, df_ref):
filename = "test1"
sheet_name = "Sheet1"

excel = ExcelFile(filename + read_ext)
df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc
df2_parse = excel.parse(index_col=0,
sheet_name=sheet_name)
with pd.ExcelFile(filename + read_ext) as excel:
df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc

with pd.ExcelFile(filename + read_ext) as excel:
df2_parse = excel.parse(index_col=0,
sheet_name=sheet_name)

tm.assert_frame_equal(df1_parse, df_ref, check_names=False)
tm.assert_frame_equal(df2_parse, df_ref, check_names=False)

def test_excel_read_buffer(self, read_ext):

pth = 'test1' + read_ext
expected = pd.read_excel(pth, 'Sheet1', index_col=0)
engine = pd.ExcelFile.keywords['engine'] # TODO: fixturize
expected = pd.read_excel(pth, 'Sheet1', index_col=0, engine=engine)

with open(pth, 'rb') as f:
xls = ExcelFile(f)
actual = pd.read_excel(xls, 'Sheet1', index_col=0)
with pd.ExcelFile(f) as xls:
actual = pd.read_excel(xls, 'Sheet1', index_col=0)

tm.assert_frame_equal(expected, actual)

def test_reader_closes_file(self, read_ext):

f = open('test1' + read_ext, 'rb')
with ExcelFile(f) as xlsx:
engine = pd.ExcelFile.keywords['engine'] # TODO: fixturize
with pd.ExcelFile(f) as xlsx:
# parses okay
pd.read_excel(xlsx, 'Sheet1', index_col=0)
pd.read_excel(xlsx, 'Sheet1', index_col=0, engine=engine)

assert f.closed

@pytest.mark.parametrize('excel_engine', [
'xlrd',
None
])
def test_read_excel_engine_value(self, read_ext, excel_engine):
def test_conflicting_excel_engines(self, read_ext):
# GH 26566
xl = ExcelFile("test1" + read_ext, engine=excel_engine)
msg = "Engine should not be specified when passing an ExcelFile"
with pytest.raises(ValueError, match=msg):
pd.read_excel(xl, engine='openpyxl')

with pd.ExcelFile("test1" + read_ext) as xl:
with pytest.raises(ValueError, match=msg):
pd.read_excel(xl, engine='foo')
2 changes: 2 additions & 0 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ class and any subclasses, on account of the `autouse=True`
set_option(option_name, prev_engine) # Roll back option change


@td.skip_if_no('xlrd')
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The writer tests still assume xlrd to be there. Will fix up in a follow up to address proper parametrization here (have focused more on readers given recent PRs)

@simonjayhawkins

@pytest.mark.parametrize("engine,ext", [
pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif(
not td.safe_import('openpyxl'), reason='No openpyxl')),
Expand Down Expand Up @@ -1237,6 +1238,7 @@ def check_called(func):
'something.xls', engine='dummy'))


@td.skip_if_no('xlrd')
@td.skip_if_no('openpyxl')
@pytest.mark.skipif(not PY36, reason='requires fspath')
class TestFSPath:
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/io/excel/test_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,11 @@ def test_read_xlrd_book(read_ext, frame):
result = pd.read_excel(book, sheet_name=sheet_name,
engine=engine, index_col=0)
tm.assert_frame_equal(df, result)


# TODO: test for openpyxl as well
def test_excel_table_sheet_by_index(datapath, read_ext):
path = datapath("io", "data", 'test1{}'.format(read_ext))
with pd.ExcelFile(path) as excel:
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, 'asdf')