Skip to content

Commit

Permalink
EHN: Add encoding_errors option in pandas.DataFrame.to_csv (pandas-de…
Browse files Browse the repository at this point in the history
…v#27750)

encoding_errors : str, default 'strict'
Behavior when the input string can’t be converted according to
the encoding’s rules (strict, ignore, replace, etc.)
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
  • Loading branch information
shigemk2 committed Sep 2, 2019
1 parent f8a924b commit b4f6929
Show file tree
Hide file tree
Showing 6 changed files with 488 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1710,6 +1710,8 @@ function takes a number of arguments. Only the first is required.
appropriate (default None)
* ``chunksize``: Number of rows to write at a time
* ``date_format``: Format string for datetime objects
* ``encoding_errors``: Behavior when the input string can’t be converted according to the encoding’s rules (strict, ignore, replace, etc.)
.. versionadded:: 1.0.0

Writing a formatted string
++++++++++++++++++++++++++
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ including other versions of pandas.
Enhancements
~~~~~~~~~~~~

-
- :meth:`Dataframe.to_csv` Add `encoding_errors` option (:issue:`27750`).
-

.. _whatsnew_1000.enhancements.other:
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3084,6 +3084,7 @@ def to_csv(
doublequote: bool_t = True,
escapechar: Optional[str] = None,
decimal: Optional[str] = ".",
encoding_errors: Optional[str] = "strict",
) -> Optional[str]:
r"""
Write object to a comma-separated values (csv) file.
Expand Down Expand Up @@ -3171,6 +3172,11 @@ def to_csv(
decimal : str, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data.
encoding_errors : str, default 'strict'
Behavior when the input string can’t be converted according to
the encoding’s rules (strict, ignore, replace, etc.)
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
.. versionadded:: 1.0.0
Returns
-------
Expand Down Expand Up @@ -3224,6 +3230,7 @@ def to_csv(
doublequote=doublequote,
escapechar=escapechar,
decimal=decimal,
encoding_errors=encoding_errors,
)
formatter.save()

Expand Down
12 changes: 10 additions & 2 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ def _get_handle(
compression: Optional[Union[str, Dict[str, Any]]] = None,
memory_map: bool = False,
is_text: bool = True,
encoding_errors: Optional[str] = "strict",
):
"""
Get file handle for given path/buffer and mode.
Expand Down Expand Up @@ -395,6 +396,11 @@ def _get_handle(
is_text : boolean, default True
whether file/buffer is in text format (csv, json, etc.), or in binary
mode (pickle, etc.).
encoding_errors : str, default 'strict'
Behavior when the input string can’t be converted according to
the encoding’s rules (strict, ignore, replace, etc.)
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
.. versionadded:: 1.0.0
Returns
-------
Expand Down Expand Up @@ -472,10 +478,12 @@ def _get_handle(
elif is_path:
if encoding:
# Encoding
f = open(path_or_buf, mode, encoding=encoding, newline="")
f = open(
path_or_buf, mode, errors=encoding_errors, encoding=encoding, newline=""
)
elif is_text:
# No explicit encoding
f = open(path_or_buf, mode, errors="replace", newline="")
f = open(path_or_buf, mode, errors=encoding_errors, newline="")
else:
# Binary mode
f = open(path_or_buf, mode)
Expand Down
4 changes: 4 additions & 0 deletions pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(
doublequote=True,
escapechar=None,
decimal=".",
encoding_errors="strict",
):

self.obj = obj
Expand Down Expand Up @@ -97,6 +98,8 @@ def __init__(

self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)

self.encoding_errors = encoding_errors

# validate mi options
if self.has_mi_columns:
if cols is not None:
Expand Down Expand Up @@ -183,6 +186,7 @@ def save(self):
self.mode,
encoding=self.encoding,
compression=dict(self.compression_args, method=self.compression),
encoding_errors=self.encoding_errors,
)
close = True

Expand Down
Loading

0 comments on commit b4f6929

Please sign in to comment.