-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Openpyxl engine for reading excel files #25092
Changes from 83 commits
e29b4c0
e0199a8
ce4eb01
b25877e
821fa4d
4694668
712f1ef
1d49a0e
1473c0e
6e8ffba
d57dfc1
e984f6b
44f7af2
98d3865
d0188ba
205d52b
7b550bf
875de8d
12ad6d8
dfd6a36
fef7233
eaafd5f
8d2db02
13e7793
b053cce
fe4dd73
64e5f2d
99b2cad
ce5ac05
c7895ea
2ca9368
5fb1aef
537dd0c
44cddc5
e4c8f23
daff364
1224918
1bfc030
747311e
a77a4c7
ddcaad8
757235d
cdd627f
0b58109
45f21f8
e97d029
1edae5e
a69e104
f5f40e4
22e24bb
903b188
1b3ae99
02e19a8
3e18f97
d11956c
61d7a3f
13d41b2
97c85f5
614d972
d87d9c0
7348b0c
c1a1792
d72ca5a
0bba345
8dd8bf6
eaaa680
6bf5183
a06bf9b
f43e90f
8fabe0a
0ff5ce3
fb73692
17b1d73
3d248ed
c369fd8
70b15a4
a3a3bca
fcd43f0
d9c1fa6
3c239a4
4a25a5a
6258e59
00f34b1
a1fba90
88ee325
837ce26
dddc8c5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,12 @@ | ||
from pandas.io.excel._base import ExcelWriter | ||
from typing import List | ||
|
||
import numpy as np | ||
|
||
from pandas.compat._optional import import_optional_dependency | ||
|
||
from pandas._typing import FilePathOrBuffer, Scalar | ||
|
||
from pandas.io.excel._base import ExcelWriter, _BaseExcelReader | ||
from pandas.io.excel._util import _validate_freeze_panes | ||
|
||
|
||
|
@@ -451,3 +459,67 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, | |
xcell = wks.cell(column=col, row=row) | ||
for k, v in style_kwargs.items(): | ||
setattr(xcell, k, v) | ||
|
||
|
||
class _OpenpyxlReader(_BaseExcelReader): | ||
|
||
def __init__(self, filepath_or_buffer: FilePathOrBuffer) -> None: | ||
"""Reader using openpyxl engine. | ||
|
||
Parameters | ||
---------- | ||
filepath_or_buffer : string, path object or Workbook | ||
Object to be parsed. | ||
""" | ||
import_optional_dependency("openpyxl") | ||
super().__init__(filepath_or_buffer) | ||
|
||
@property | ||
def _workbook_class(self): | ||
from openpyxl import Workbook | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return Workbook | ||
|
||
def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): | ||
from openpyxl import load_workbook | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return load_workbook(filepath_or_buffer, | ||
read_only=True, data_only=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW, you almost certainly want There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
@property | ||
def sheet_names(self) -> List[str]: | ||
return self.book.sheetnames | ||
|
||
def get_sheet_by_name(self, name: str): | ||
return self.book[name] | ||
|
||
def get_sheet_by_index(self, index: int): | ||
return self.book.worksheets[index] | ||
|
||
def _convert_cell(self, cell, convert_float: bool) -> Scalar: | ||
|
||
# TODO: replace with openpyxl constants | ||
if cell.is_date: | ||
return cell.value | ||
elif cell.data_type == 'e': | ||
return np.nan | ||
elif cell.data_type == 'b': | ||
return bool(cell.value) | ||
elif cell.value is None: | ||
return '' # compat with xlrd | ||
elif cell.data_type == 'n': | ||
# GH5394 | ||
if convert_float: | ||
val = int(cell.value) | ||
if val == cell.value: | ||
return val | ||
else: | ||
return float(cell.value) | ||
|
||
return cell.value | ||
|
||
def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: | ||
data = [] # type: List[List[Scalar]] | ||
for row in sheet.rows: | ||
data.append( | ||
[self._convert_cell(cell, convert_float) for cell in row]) | ||
|
||
return data |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would put openpyxl in double-backquotes, but if this is the only issue, then can later