diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 45c280b89ea28..24f307f23f435 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -173,6 +173,7 @@ MultiIndex I/O ^^^ +- Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`) - - diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 719a4472fb9e3..4d6a766ad6cfa 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -82,8 +82,9 @@ or ``StringIO``. sheet_name : str, int, list, or None, default 0 Strings are used for sheet names. Integers are used in zero-indexed - sheet positions. Lists of strings/integers are used to request - multiple sheets. Specify None to get all sheets. + sheet positions (chart sheets do not count as a sheet position). + Lists of strings/integers are used to request multiple sheets. + Specify None to get all worksheets. Available cases: @@ -92,7 +93,7 @@ * ``"Sheet1"``: Load sheet with name "Sheet1" * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5" as a dict of `DataFrame` - * None: All sheets. + * None: All worksheets. header : int, list of int, default 0 Row (0-indexed) to use for the column labels of the parsed diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 03c46f139eeca..d499f1a5ea89f 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -536,7 +536,7 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): @property def sheet_names(self) -> list[str]: - return self.book.sheetnames + return [sheet.title for sheet in self.book.worksheets] def get_sheet_by_name(self, name: str): self.raise_if_bad_sheet_by_name(name) diff --git a/pandas/tests/io/data/excel/chartsheet.xls b/pandas/tests/io/data/excel/chartsheet.xls new file mode 100644 index 0000000000000..7d027400fbd52 Binary files /dev/null and b/pandas/tests/io/data/excel/chartsheet.xls differ diff --git a/pandas/tests/io/data/excel/chartsheet.xlsb b/pandas/tests/io/data/excel/chartsheet.xlsb new file mode 100644 index 0000000000000..805087280f851 Binary files /dev/null and b/pandas/tests/io/data/excel/chartsheet.xlsb differ diff --git a/pandas/tests/io/data/excel/chartsheet.xlsm b/pandas/tests/io/data/excel/chartsheet.xlsm new file mode 100644 index 0000000000000..aadb48d6f4824 Binary files /dev/null and b/pandas/tests/io/data/excel/chartsheet.xlsm differ diff --git a/pandas/tests/io/data/excel/chartsheet.xlsx b/pandas/tests/io/data/excel/chartsheet.xlsx new file mode 100644 index 0000000000000..c8d5e7afb3d07 Binary files /dev/null and b/pandas/tests/io/data/excel/chartsheet.xlsx differ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index d40fb3ce4a135..cbd241ceda0b1 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1250,6 +1250,34 @@ def test_trailing_blanks(self, read_ext): result = pd.read_excel(file_name) assert result.shape == (3, 3) + def test_ignore_chartsheets_by_str(self, request, read_ext): + # GH 41448 + if pd.read_excel.keywords["engine"] == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if pd.read_excel.keywords["engine"] == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pytest.raises(ValueError, match="Worksheet named 'Chart1' not found"): + pd.read_excel("chartsheet" + read_ext, sheet_name="Chart1") + + def test_ignore_chartsheets_by_int(self, request, read_ext): + # GH 41448 + if pd.read_excel.keywords["engine"] == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if pd.read_excel.keywords["engine"] == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pytest.raises( + ValueError, match="Worksheet index 1 is invalid, 1 worksheets found" + ): + pd.read_excel("chartsheet" + read_ext, sheet_name=1) + class TestExcelFileRead: @pytest.fixture(autouse=True) @@ -1501,6 +1529,19 @@ def test_engine_invalid_option(self, read_ext): with pd.option_context(f"io.excel{read_ext}.reader", "abc"): pass + def test_ignore_chartsheets(self, request, engine, read_ext): + # GH 41448 + if engine == "odf": + pytest.skip("chartsheets do not exist in the ODF format") + if engine == "pyxlsb": + request.node.add_marker( + pytest.mark.xfail( + reason="pyxlsb can't distinguish chartsheets from worksheets" + ) + ) + with pd.ExcelFile("chartsheet" + read_ext) as excel: + assert excel.sheet_names == ["Sheet1"] + def test_corrupt_files_closed(self, request, engine, read_ext): # GH41778 errors = (BadZipFile,)