diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 249f08c7e387b..08ba9e7437f04 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -248,6 +248,7 @@ I/O ^^^ - Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`) - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`) +- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`) Period ^^^^^^ diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 74cbe90acdae8..bc7dca2d95b6b 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -192,7 +192,15 @@ def _make_table_cell(self, cell) -> tuple[object, Any]: if isinstance(val, bool): value = str(val).lower() pvalue = str(val).upper() - if isinstance(val, datetime.datetime): + return ( + pvalue, + TableCell( + valuetype="boolean", + booleanvalue=value, + attributes=attributes, + ), + ) + elif isinstance(val, datetime.datetime): # Fast formatting value = val.isoformat() # Slow but locale-dependent @@ -210,17 +218,20 @@ def _make_table_cell(self, cell) -> tuple[object, Any]: pvalue, TableCell(valuetype="date", datevalue=value, attributes=attributes), ) + elif isinstance(val, str): + return ( + pvalue, + TableCell( + valuetype="string", + stringvalue=value, + attributes=attributes, + ), + ) else: - class_to_cell_type = { - str: "string", - int: "float", - float: "float", - bool: "boolean", - } return ( pvalue, TableCell( - valuetype=class_to_cell_type[type(val)], + valuetype="float", value=value, attributes=attributes, ), diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py index 21d31ec8a7fb5..ecee58362f8a9 100644 --- a/pandas/tests/io/excel/test_odswriter.py +++ b/pandas/tests/io/excel/test_odswriter.py @@ -1,7 +1,12 @@ +from datetime import ( + date, + datetime, +) import re import pytest +import pandas as pd import pandas._testing as tm from pandas.io.excel import ExcelWriter @@ -47,3 +52,47 @@ def test_book_and_sheets_consistent(ext): table = odf.table.Table(name="test_name") writer.book.spreadsheet.addElement(table) assert writer.sheets == {"test_name": table} + + +@pytest.mark.parametrize( + ["value", "cell_value_type", "cell_value_attribute", "cell_value"], + argvalues=[ + (True, "boolean", "boolean-value", "true"), + ("test string", "string", "string-value", "test string"), + (1, "float", "value", "1"), + (1.5, "float", "value", "1.5"), + ( + datetime(2010, 10, 10, 10, 10, 10), + "date", + "date-value", + "2010-10-10T10:10:10", + ), + (date(2010, 10, 10), "date", "date-value", "2010-10-10"), + ], +) +def test_cell_value_type(ext, value, cell_value_type, cell_value_attribute, cell_value): + # GH#54994 ODS: cell attributes should follow specification + # http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13 + from odf.namespaces import OFFICENS + from odf.table import ( + TableCell, + TableRow, + ) + + table_cell_name = TableCell().qname + + with tm.ensure_clean(ext) as f: + pd.DataFrame([[value]]).to_excel(f, header=False, index=False) + + with pd.ExcelFile(f) as wb: + sheet = wb._reader.get_sheet_by_index(0) + sheet_rows = sheet.getElementsByType(TableRow) + sheet_cells = [ + x + for x in sheet_rows[0].childNodes + if hasattr(x, "qname") and x.qname == table_cell_name + ] + + cell = sheet_cells[0] + assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type + assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index de444019e7b4c..8dd9f96a05a90 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -578,17 +578,11 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): actual = pd.read_excel(basename + read_ext, dtype=dtype) tm.assert_frame_equal(actual, expected) - def test_dtype_backend(self, request, engine, read_ext, dtype_backend): + def test_dtype_backend(self, read_ext, dtype_backend): # GH#36712 if read_ext in (".xlsb", ".xls"): pytest.skip(f"No engine for filetype: '{read_ext}'") - # GH 54994 - if engine == "calamine" and read_ext == ".ods": - request.node.add_marker( - pytest.mark.xfail(reason="OdsWriter produces broken file") - ) - df = DataFrame( { "a": Series([1, 3], dtype="Int64"), @@ -629,17 +623,11 @@ def test_dtype_backend(self, request, engine, read_ext, dtype_backend): expected = df tm.assert_frame_equal(result, expected) - def test_dtype_backend_and_dtype(self, request, engine, read_ext): + def test_dtype_backend_and_dtype(self, read_ext): # GH#36712 if read_ext in (".xlsb", ".xls"): pytest.skip(f"No engine for filetype: '{read_ext}'") - # GH 54994 - if engine == "calamine" and read_ext == ".ods": - request.node.add_marker( - pytest.mark.xfail(reason="OdsWriter produces broken file") - ) - df = DataFrame({"a": [np.nan, 1.0], "b": [2.5, np.nan]}) with tm.ensure_clean(read_ext) as file_path: df.to_excel(file_path, sheet_name="test", index=False) @@ -651,17 +639,11 @@ def test_dtype_backend_and_dtype(self, request, engine, read_ext): ) tm.assert_frame_equal(result, df) - def test_dtype_backend_string(self, request, engine, read_ext, string_storage): + def test_dtype_backend_string(self, read_ext, string_storage): # GH#36712 if read_ext in (".xlsb", ".xls"): pytest.skip(f"No engine for filetype: '{read_ext}'") - # GH 54994 - if engine == "calamine" and read_ext == ".ods": - request.node.add_marker( - pytest.mark.xfail(reason="OdsWriter produces broken file") - ) - pa = pytest.importorskip("pyarrow") with pd.option_context("mode.string_storage", string_storage):