diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1fd0257d93f45..1fe808e098860 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -159,6 +159,7 @@ Other enhancements - Added support for reading SPSS .sav files using :func:`read_spss` (:issue:`26537`) - Added new option ``plotting.backend`` to be able to select a plotting backend different than the existing ``matplotlib`` one. Use ``pandas.set_option('plotting.backend', '')`` where `` None: + """Reader using openpyxl engine. + + Parameters + ---------- + filepath_or_buffer : string, path object or Workbook + Object to be parsed. + """ + import_optional_dependency("openpyxl") + super().__init__(filepath_or_buffer) + + @property + def _workbook_class(self): + from openpyxl import Workbook + return Workbook + + def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): + from openpyxl import load_workbook + return load_workbook(filepath_or_buffer, + read_only=True, data_only=True) + + @property + def sheet_names(self) -> List[str]: + return self.book.sheetnames + + def get_sheet_by_name(self, name: str): + return self.book[name] + + def get_sheet_by_index(self, index: int): + return self.book.worksheets[index] + + def _convert_cell(self, cell, convert_float: bool) -> Scalar: + + # TODO: replace with openpyxl constants + if cell.is_date: + return cell.value + elif cell.data_type == 'e': + return np.nan + elif cell.data_type == 'b': + return bool(cell.value) + elif cell.value is None: + return '' # compat with xlrd + elif cell.data_type == 'n': + # GH5394 + if convert_float: + val = int(cell.value) + if val == cell.value: + return val + else: + return float(cell.value) + + return cell.value + + def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: + data = [] # type: List[List[Scalar]] + for row in sheet.rows: + data.append( + [self._convert_cell(cell, convert_float) for cell in row]) + + return data diff --git a/pandas/tests/io/data/test1.xlsm b/pandas/tests/io/data/test1.xlsm index f93c57ab7f857..28f4f27e4e1b1 100644 Binary files a/pandas/tests/io/data/test1.xlsm and b/pandas/tests/io/data/test1.xlsm differ diff --git a/pandas/tests/io/data/test1.xlsx b/pandas/tests/io/data/test1.xlsx index a437d838fe130..862574e05a114 100644 Binary files a/pandas/tests/io/data/test1.xlsx and b/pandas/tests/io/data/test1.xlsx differ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 48fb6b705a4a4..579f39e21d3c1 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -38,13 +38,17 @@ class TestReaders: # Add any engines to test here pytest.param('xlrd', marks=pytest.mark.skipif( not td.safe_import("xlrd"), reason="no xlrd")), + pytest.param('openpyxl', marks=pytest.mark.skipif( + not td.safe_import("openpyxl"), reason="no openpyxl")), pytest.param(None, marks=pytest.mark.skipif( not td.safe_import("xlrd"), reason="no xlrd")), ]) - def cd_and_set_engine(self, request, datapath, monkeypatch): + def cd_and_set_engine(self, request, datapath, monkeypatch, read_ext): """ Change directory and set engine for read_excel calls. """ + if request.param == 'openpyxl' and read_ext == '.xls': + pytest.skip() func = partial(pd.read_excel, engine=request.param) monkeypatch.chdir(datapath("io", "data")) monkeypatch.setattr(pd, 'read_excel', func) @@ -397,6 +401,9 @@ def test_date_conversion_overflow(self, read_ext): [1e+20, 'Timothy Brown']], columns=['DateColWithBigInt', 'StringCol']) + if pd.read_excel.keywords['engine'] == 'openpyxl': + pytest.xfail("Maybe not supported by openpyxl") + result = pd.read_excel('testdateoverflow' + read_ext) tm.assert_frame_equal(result, expected) @@ -724,6 +731,8 @@ class TestExcelFileRead: # Add any engines to test here pytest.param('xlrd', marks=pytest.mark.skipif( not td.safe_import("xlrd"), reason="no xlrd")), + pytest.param('openpyxl', marks=pytest.mark.skipif( + not td.safe_import("openpyxl"), reason="no openpyxl")), pytest.param(None, marks=pytest.mark.skipif( not td.safe_import("xlrd"), reason="no xlrd")), ])