Skip to content

Commit

Permalink
TST: Decoupled more xlrd reading tests from openpyxl (#27114)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored Jun 30, 2019
1 parent 2811464 commit 65ec968
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 50 deletions.
103 changes: 53 additions & 50 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import pandas.util.testing as tm

from pandas.io.common import URLError
from pandas.io.excel import ExcelFile


@contextlib.contextmanager
Expand Down Expand Up @@ -736,121 +735,125 @@ class TestExcelFileRead:
pytest.param(None, marks=pytest.mark.skipif(
not td.safe_import("xlrd"), reason="no xlrd")),
])
def cd_and_set_engine(self, request, datapath, monkeypatch):
def cd_and_set_engine(self, request, datapath, monkeypatch, read_ext):
"""
Change directory and set engine for ExcelFile objects.
"""
if request.param == 'openpyxl' and read_ext == '.xls':
pytest.skip()

func = partial(pd.ExcelFile, engine=request.param)
monkeypatch.chdir(datapath("io", "data"))
monkeypatch.setattr(pd, 'ExcelFile', func)

def test_excel_passes_na(self, read_ext):

excel = ExcelFile('test4' + read_ext)

parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
na_values=['apple'])
with pd.ExcelFile('test4' + read_ext) as excel:
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
na_values=['apple'])
expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
columns=['Test'])
tm.assert_frame_equal(parsed, expected)

parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
na_values=['apple'])
with pd.ExcelFile('test4' + read_ext) as excel:
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
na_values=['apple'])
expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
columns=['Test'])
tm.assert_frame_equal(parsed, expected)

# 13967
excel = ExcelFile('test5' + read_ext)

parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
na_values=['apple'])
with pd.ExcelFile('test5' + read_ext) as excel:
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
na_values=['apple'])
expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']],
columns=['Test'])
tm.assert_frame_equal(parsed, expected)

parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
na_values=['apple'])
with pd.ExcelFile('test5' + read_ext) as excel:
parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
na_values=['apple'])
expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
columns=['Test'])
tm.assert_frame_equal(parsed, expected)

@pytest.mark.parametrize('arg', ['sheet', 'sheetname', 'parse_cols'])
def test_unexpected_kwargs_raises(self, read_ext, arg):
# gh-17964
excel = ExcelFile('test1' + read_ext)

kwarg = {arg: 'Sheet1'}
msg = "unexpected keyword argument `{}`".format(arg)
with pytest.raises(TypeError, match=msg):
pd.read_excel(excel, **kwarg)

def test_excel_table_sheet_by_index(self, read_ext, df_ref):
with pd.ExcelFile('test1' + read_ext) as excel:
with pytest.raises(TypeError, match=msg):
pd.read_excel(excel, **kwarg)

excel = ExcelFile('test1' + read_ext)
def test_excel_table_sheet_by_index(self, read_ext, df_ref):

df1 = pd.read_excel(excel, 0, index_col=0)
df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0)
with pd.ExcelFile('test1' + read_ext) as excel:
df1 = pd.read_excel(excel, 0, index_col=0)
df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0)
tm.assert_frame_equal(df1, df_ref, check_names=False)
tm.assert_frame_equal(df2, df_ref, check_names=False)

df1 = excel.parse(0, index_col=0)
df2 = excel.parse(1, skiprows=[1], index_col=0)
with pd.ExcelFile('test1' + read_ext) as excel:
df1 = excel.parse(0, index_col=0)
df2 = excel.parse(1, skiprows=[1], index_col=0)
tm.assert_frame_equal(df1, df_ref, check_names=False)
tm.assert_frame_equal(df2, df_ref, check_names=False)

df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1)
with pd.ExcelFile('test1' + read_ext) as excel:
df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1)
tm.assert_frame_equal(df3, df1.iloc[:-1])

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1)
with pd.ExcelFile('test1' + read_ext) as excel:
df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1)

tm.assert_frame_equal(df3, df4)

df3 = excel.parse(0, index_col=0, skipfooter=1)
tm.assert_frame_equal(df3, df1.iloc[:-1])
with pd.ExcelFile('test1' + read_ext) as excel:
df3 = excel.parse(0, index_col=0, skipfooter=1)

import xlrd # will move to engine-specific tests as new ones are added
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, 'asdf')
tm.assert_frame_equal(df3, df1.iloc[:-1])

def test_sheet_name(self, read_ext, df_ref):
filename = "test1"
sheet_name = "Sheet1"

excel = ExcelFile(filename + read_ext)
df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc
df2_parse = excel.parse(index_col=0,
sheet_name=sheet_name)
with pd.ExcelFile(filename + read_ext) as excel:
df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc

with pd.ExcelFile(filename + read_ext) as excel:
df2_parse = excel.parse(index_col=0,
sheet_name=sheet_name)

tm.assert_frame_equal(df1_parse, df_ref, check_names=False)
tm.assert_frame_equal(df2_parse, df_ref, check_names=False)

def test_excel_read_buffer(self, read_ext):

pth = 'test1' + read_ext
expected = pd.read_excel(pth, 'Sheet1', index_col=0)
engine = pd.ExcelFile.keywords['engine'] # TODO: fixturize
expected = pd.read_excel(pth, 'Sheet1', index_col=0, engine=engine)

with open(pth, 'rb') as f:
xls = ExcelFile(f)
actual = pd.read_excel(xls, 'Sheet1', index_col=0)
with pd.ExcelFile(f) as xls:
actual = pd.read_excel(xls, 'Sheet1', index_col=0)

tm.assert_frame_equal(expected, actual)

def test_reader_closes_file(self, read_ext):

f = open('test1' + read_ext, 'rb')
with ExcelFile(f) as xlsx:
engine = pd.ExcelFile.keywords['engine'] # TODO: fixturize
with pd.ExcelFile(f) as xlsx:
# parses okay
pd.read_excel(xlsx, 'Sheet1', index_col=0)
pd.read_excel(xlsx, 'Sheet1', index_col=0, engine=engine)

assert f.closed

@pytest.mark.parametrize('excel_engine', [
'xlrd',
None
])
def test_read_excel_engine_value(self, read_ext, excel_engine):
def test_conflicting_excel_engines(self, read_ext):
# GH 26566
xl = ExcelFile("test1" + read_ext, engine=excel_engine)
msg = "Engine should not be specified when passing an ExcelFile"
with pytest.raises(ValueError, match=msg):
pd.read_excel(xl, engine='openpyxl')

with pd.ExcelFile("test1" + read_ext) as xl:
with pytest.raises(ValueError, match=msg):
pd.read_excel(xl, engine='foo')
2 changes: 2 additions & 0 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ class and any subclasses, on account of the `autouse=True`
set_option(option_name, prev_engine) # Roll back option change


@td.skip_if_no('xlrd')
@pytest.mark.parametrize("engine,ext", [
pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif(
not td.safe_import('openpyxl'), reason='No openpyxl')),
Expand Down Expand Up @@ -1252,6 +1253,7 @@ def check_called(func):
'something.xls', engine='dummy'))


@td.skip_if_no('xlrd')
@td.skip_if_no('openpyxl')
@pytest.mark.skipif(not PY36, reason='requires fspath')
class TestFSPath:
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/io/excel/test_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,11 @@ def test_read_xlrd_book(read_ext, frame):
result = pd.read_excel(book, sheet_name=sheet_name,
engine=engine, index_col=0)
tm.assert_frame_equal(df, result)


# TODO: test for openpyxl as well
def test_excel_table_sheet_by_index(datapath, read_ext):
path = datapath("io", "data", 'test1{}'.format(read_ext))
with pd.ExcelFile(path) as excel:
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, 'asdf')

0 comments on commit 65ec968

Please sign in to comment.