Skip to content

Commit

Permalink
BUG: Ignore chartsheets (pandas-dev#41698)
Browse files Browse the repository at this point in the history
  • Loading branch information
ahawryluk authored Jul 2, 2021
1 parent 71dd600 commit 7178fbb
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ MultiIndex

I/O
^^^
- Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
-
-

Expand Down
7 changes: 4 additions & 3 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@
or ``StringIO``.
sheet_name : str, int, list, or None, default 0
Strings are used for sheet names. Integers are used in zero-indexed
sheet positions. Lists of strings/integers are used to request
multiple sheets. Specify None to get all sheets.
sheet positions (chart sheets do not count as a sheet position).
Lists of strings/integers are used to request multiple sheets.
Specify None to get all worksheets.
Available cases:
Expand All @@ -92,7 +93,7 @@
* ``"Sheet1"``: Load sheet with name "Sheet1"
* ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5"
as a dict of `DataFrame`
* None: All sheets.
* None: All worksheets.
header : int, list of int, default 0
Row (0-indexed) to use for the column labels of the parsed
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):

@property
def sheet_names(self) -> list[str]:
return self.book.sheetnames
return [sheet.title for sheet in self.book.worksheets]

def get_sheet_by_name(self, name: str):
self.raise_if_bad_sheet_by_name(name)
Expand Down
Binary file added pandas/tests/io/data/excel/chartsheet.xls
Binary file not shown.
Binary file added pandas/tests/io/data/excel/chartsheet.xlsb
Binary file not shown.
Binary file added pandas/tests/io/data/excel/chartsheet.xlsm
Binary file not shown.
Binary file added pandas/tests/io/data/excel/chartsheet.xlsx
Binary file not shown.
41 changes: 41 additions & 0 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,34 @@ def test_trailing_blanks(self, read_ext):
result = pd.read_excel(file_name)
assert result.shape == (3, 3)

def test_ignore_chartsheets_by_str(self, request, read_ext):
# GH 41448
if pd.read_excel.keywords["engine"] == "odf":
pytest.skip("chartsheets do not exist in the ODF format")
if pd.read_excel.keywords["engine"] == "pyxlsb":
request.node.add_marker(
pytest.mark.xfail(
reason="pyxlsb can't distinguish chartsheets from worksheets"
)
)
with pytest.raises(ValueError, match="Worksheet named 'Chart1' not found"):
pd.read_excel("chartsheet" + read_ext, sheet_name="Chart1")

def test_ignore_chartsheets_by_int(self, request, read_ext):
# GH 41448
if pd.read_excel.keywords["engine"] == "odf":
pytest.skip("chartsheets do not exist in the ODF format")
if pd.read_excel.keywords["engine"] == "pyxlsb":
request.node.add_marker(
pytest.mark.xfail(
reason="pyxlsb can't distinguish chartsheets from worksheets"
)
)
with pytest.raises(
ValueError, match="Worksheet index 1 is invalid, 1 worksheets found"
):
pd.read_excel("chartsheet" + read_ext, sheet_name=1)


class TestExcelFileRead:
@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -1501,6 +1529,19 @@ def test_engine_invalid_option(self, read_ext):
with pd.option_context(f"io.excel{read_ext}.reader", "abc"):
pass

def test_ignore_chartsheets(self, request, engine, read_ext):
# GH 41448
if engine == "odf":
pytest.skip("chartsheets do not exist in the ODF format")
if engine == "pyxlsb":
request.node.add_marker(
pytest.mark.xfail(
reason="pyxlsb can't distinguish chartsheets from worksheets"
)
)
with pd.ExcelFile("chartsheet" + read_ext) as excel:
assert excel.sheet_names == ["Sheet1"]

def test_corrupt_files_closed(self, request, engine, read_ext):
# GH41778
errors = (BadZipFile,)
Expand Down

0 comments on commit 7178fbb

Please sign in to comment.