Skip to content

Commit

Permalink
EHN: Add encoding_errors option in pandas.DataFrame.to_csv (pandas-de…
Browse files Browse the repository at this point in the history
…v#27750)

encoding_errors : str, default 'strict'
Behavior when the input string can’t be converted according to
the encoding’s rules (strict, ignore, replace, etc.)
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
  • Loading branch information
shigemk2 committed Aug 19, 2019
1 parent 9d7a282 commit 55e572d
Show file tree
Hide file tree
Showing 6 changed files with 496 additions and 4 deletions.
4 changes: 4 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ encoding : str, default ``None``
Encoding to use for UTF when reading/writing (e.g. ``'utf-8'``). `List of
Python standard encodings
<https://docs.python.org/3/library/codecs.html#standard-encodings>`_.
encoding_errors : str, default 'strict'
Behavior when the input string can’t be converted according to
the encoding’s rules (strict, ignore, replace, etc.)
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
dialect : str or :class:`python:csv.Dialect` instance, default ``None``
If provided, this parameter will override values (default or not) for the
following parameters: `delimiter`, `doublequote`, `escapechar`,
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ including other versions of pandas.
Enhancements
~~~~~~~~~~~~

-
- :meth:`Dataframe.to_csv` Add `encoding_errors` option (:issue:`27750`).
-

.. _whatsnew_1000.enhancements.other:
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3069,6 +3069,7 @@ def to_csv(
doublequote=True,
escapechar=None,
decimal=".",
encoding_errors="strict",
):
r"""
Write object to a comma-separated values (csv) file.
Expand Down Expand Up @@ -3151,6 +3152,11 @@ def to_csv(
decimal : str, default '.'
Character recognized as decimal separator. E.g. use ',' for
European data.
encoding_errors : str, default 'strict'
Behavior when the input string can’t be converted according to
the encoding’s rules (strict, ignore, replace, etc.)
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
.. versionadded:: 1.0.0
Returns
-------
Expand Down Expand Up @@ -3197,6 +3203,7 @@ def to_csv(
doublequote=doublequote,
escapechar=escapechar,
decimal=decimal,
encoding_errors=encoding_errors,
)
formatter.save()

Expand Down
19 changes: 16 additions & 3 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,13 @@ def _infer_compression(


def _get_handle(
path_or_buf, mode, encoding=None, compression=None, memory_map=False, is_text=True
path_or_buf,
mode,
encoding=None,
compression=None,
memory_map=False,
is_text=True,
encoding_errors="strict",
):
"""
Get file handle for given path/buffer and mode.
Expand All @@ -331,6 +337,11 @@ def _get_handle(
is_text : boolean, default True
whether file/buffer is in text format (csv, json, etc.), or in binary
mode (pickle, etc.)
encoding_errors : str, default 'strict'
Behavior when the input string can’t be converted according to
the encoding’s rules (strict, ignore, replace, etc.)
See: https://docs.python.org/3/library/codecs.html#codec-base-classes
.. versionadded:: 1.0.0
Returns
-------
Expand Down Expand Up @@ -407,10 +418,12 @@ def _get_handle(
elif is_path:
if encoding:
# Encoding
f = open(path_or_buf, mode, encoding=encoding, newline="")
f = open(
path_or_buf, mode, errors=encoding_errors, encoding=encoding, newline=""
)
elif is_text:
# No explicit encoding
f = open(path_or_buf, mode, errors="replace", newline="")
f = open(path_or_buf, mode, errors=encoding_errors, newline="")
else:
# Binary mode
f = open(path_or_buf, mode)
Expand Down
4 changes: 4 additions & 0 deletions pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __init__(
doublequote=True,
escapechar=None,
decimal=".",
encoding_errors="strict",
):

self.obj = obj
Expand Down Expand Up @@ -93,6 +94,8 @@ def __init__(

self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)

self.encoding_errors = encoding_errors

# validate mi options
if self.has_mi_columns:
if cols is not None:
Expand Down Expand Up @@ -179,6 +182,7 @@ def save(self):
self.mode,
encoding=self.encoding,
compression=self.compression,
encoding_errors=self.encoding_errors,
)
close = True

Expand Down
Loading

0 comments on commit 55e572d

Please sign in to comment.