EHN: Add encoding_errors option in pandas.DataFrame.to_csv (pandas-de…

…v#27750) encoding_errors : str, default 'strict' Behavior when the input string can’t be converted according to the encoding’s rules (strict, ignore, replace, etc.) See: https://docs.python.org/3/library/codecs.html#codec-base-classes
shigemk2 · Sep 2, 2019 · b4f6929 · b4f6929
1 parent f8a924b
commit b4f6929
Show file tree

Hide file tree

Showing 6 changed files with 488 additions and 3 deletions.
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -1710,6 +1710,8 @@ function takes a number of arguments. Only the first is required.
   appropriate (default None)
 * ``chunksize``: Number of rows to write at a time
 * ``date_format``: Format string for datetime objects
+* ``encoding_errors``: Behavior when the input string can’t be converted according to the encoding’s rules (strict, ignore, replace, etc.)
+  .. versionadded:: 1.0.0
 
 Writing a formatted string
 ++++++++++++++++++++++++++

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -21,7 +21,7 @@ including other versions of pandas.
 Enhancements
 ~~~~~~~~~~~~
 
--
+- :meth:`Dataframe.to_csv` Add `encoding_errors` option (:issue:`27750`).
 -
 
 .. _whatsnew_1000.enhancements.other:

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3084,6 +3084,7 @@ def to_csv(
         doublequote: bool_t = True,
         escapechar: Optional[str] = None,
         decimal: Optional[str] = ".",
+        encoding_errors: Optional[str] = "strict",
     ) -> Optional[str]:
         r"""
         Write object to a comma-separated values (csv) file.
@@ -3171,6 +3172,11 @@ def to_csv(
         decimal : str, default '.'
             Character recognized as decimal separator. E.g. use ',' for
             European data.
+        encoding_errors : str, default 'strict'
+            Behavior when the input string can’t be converted according to
+            the encoding’s rules (strict, ignore, replace, etc.)
+            See: https://docs.python.org/3/library/codecs.html#codec-base-classes
+            .. versionadded:: 1.0.0
 
         Returns
         -------
@@ -3224,6 +3230,7 @@ def to_csv(
             doublequote=doublequote,
             escapechar=escapechar,
             decimal=decimal,
+            encoding_errors=encoding_errors,
         )
         formatter.save()
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -363,6 +363,7 @@ def _get_handle(
     compression: Optional[Union[str, Dict[str, Any]]] = None,
     memory_map: bool = False,
     is_text: bool = True,
+    encoding_errors: Optional[str] = "strict",
 ):
     """
     Get file handle for given path/buffer and mode.
@@ -395,6 +396,11 @@ def _get_handle(
     is_text : boolean, default True
         whether file/buffer is in text format (csv, json, etc.), or in binary
         mode (pickle, etc.).
+    encoding_errors : str, default 'strict'
+        Behavior when the input string can’t be converted according to
+        the encoding’s rules (strict, ignore, replace, etc.)
+        See: https://docs.python.org/3/library/codecs.html#codec-base-classes
+        .. versionadded:: 1.0.0
 
     Returns
     -------
@@ -472,10 +478,12 @@ def _get_handle(
     elif is_path:
         if encoding:
             # Encoding
-            f = open(path_or_buf, mode, encoding=encoding, newline="")
+            f = open(
+                path_or_buf, mode, errors=encoding_errors, encoding=encoding, newline=""
+            )
         elif is_text:
             # No explicit encoding
-            f = open(path_or_buf, mode, errors="replace", newline="")
+            f = open(path_or_buf, mode, errors=encoding_errors, newline="")
         else:
             # Binary mode
             f = open(path_or_buf, mode)

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -52,6 +52,7 @@ def __init__(
         doublequote=True,
         escapechar=None,
         decimal=".",
+        encoding_errors="strict",
     ):
 
         self.obj = obj
@@ -97,6 +98,8 @@ def __init__(
 
         self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex)
 
+        self.encoding_errors = encoding_errors
+
         # validate mi options
         if self.has_mi_columns:
             if cols is not None:
@@ -183,6 +186,7 @@ def save(self):
                 self.mode,
                 encoding=self.encoding,
                 compression=dict(self.compression_args, method=self.compression),
+                encoding_errors=self.encoding_errors,
             )
             close = True