Skip to content

Commit

Permalink
Add xlsxwriter to improve to_excel performance (#701)
Browse files Browse the repository at this point in the history
Co-authored-by: Daniel Huppmann <dh@dergelbesalon.at>
  • Loading branch information
phackstock and danielhuppmann authored Sep 14, 2022
1 parent 759120f commit fde3690
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 9 deletions.
6 changes: 6 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# Next Release

## Dependency changes

PR [#701](https://github.com/IAMconsortium/pyam/pull/701) added `xlsxwriter` as a
dependency for better performance.

## Individual updates

- [#701](https://github.com/IAMconsortium/pyam/pull/701) Add **xlsxwriter** as dependency to improve `to_excel()` performance
- [#699](https://github.com/IAMconsortium/pyam/pull/699) Add filter options to IIASA API `index()`, `meta()` and `properties()` methods
- [#697](https://github.com/IAMconsortium/pyam/pull/697) Add warning if IIASA API returns empty result
- [#695](https://github.com/IAMconsortium/pyam/pull/695) Remove unused meta levels during initialization
Expand Down
5 changes: 3 additions & 2 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2343,7 +2343,7 @@ def to_excel(
close = False
if not isinstance(excel_writer, pd.ExcelWriter):
close = True
excel_writer = pd.ExcelWriter(excel_writer, engine="openpyxl")
excel_writer = pd.ExcelWriter(excel_writer, engine="xlsxwriter")

# write data table
write_sheet(excel_writer, sheet_name, self._to_file_format(iamc_index))
Expand Down Expand Up @@ -2374,7 +2374,8 @@ def export_meta(self, excel_writer, sheet_name="meta"):
"""
close = False
if not isinstance(excel_writer, pd.ExcelWriter):
excel_writer, close = pd.ExcelWriter(excel_writer), True
excel_writer = pd.ExcelWriter(excel_writer, engine="xlsxwriter")
close = True
write_sheet(excel_writer, sheet_name, self.meta, index=True)
if close:
excel_writer.close()
Expand Down
7 changes: 1 addition & 6 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,19 +124,14 @@ def write_sheet(writer, name, df, index=False):
if index:
df = df.reset_index()
df.to_excel(writer, name, index=False)
worksheet = writer.sheets[name]
for i, col in enumerate(df.columns):
if df.dtypes[col].name.startswith(("float", "int")):
width = len(str(col)) + 2
else:
width = (
max([df[col].map(lambda x: len(str(x or "None"))).max(), len(col)]) + 2
)
# this line fails if using an xlsx-engine other than openpyxl
try:
worksheet.column_dimensions[NUMERIC_TO_STR[i]].width = width
except AttributeError:
pass
writer.sheets[name].set_column(i, i, width) # assumes xlsxwriter as engine


def read_pandas(path, sheet_name=["data*", "Data*"], *args, **kwargs):
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ install_requires =
setuptools_scm
# required explicitly for Python 3.7
importlib_metadata
xlsxwriter
setup_requires =
setuptools >= 41
setuptools_scm
Expand Down
2 changes: 1 addition & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_io_xlsx(test_df, meta_args, tmpdir):
def test_io_xlsx_multiple_data_sheets(test_df, sheets, sheetname, tmpdir):
# write data to separate sheets in excel file
file = tmpdir / "testing_io_write_read.xlsx"
xl = pd.ExcelWriter(file)
xl = pd.ExcelWriter(file, engine="xlsxwriter")
for i, (model, scenario) in enumerate(test_df.index):
test_df.filter(scenario=scenario).to_excel(xl, sheet_name=sheets[i])
test_df.export_meta(xl)
Expand Down

0 comments on commit fde3690

Please sign in to comment.