Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: remove encoding kwarg from read_stata, DataFrame.to_stata #29722

Merged
merged 4 commits into from
Nov 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`)
- Removed previously deprecated "order" argument from :func:`factorize` (:issue:`19751`)
- Removed previously deprecated "v" argument from :meth:`FrozenNDarray.searchsorted`, use "value" instead (:issue:`22672`)
- :func:`read_stata` and :meth:`DataFrame.to_stata` no longer supports the "encoding" argument (:issue:`21400`)
- Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`)
- Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`)
-
Expand Down
11 changes: 1 addition & 10 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,7 @@

from pandas._libs import algos as libalgos, lib
from pandas.compat.numpy import function as nv
from pandas.util._decorators import (
Appender,
Substitution,
deprecate_kwarg,
rewrite_axis_style_signature,
)
from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
from pandas.util._validators import (
validate_axis_style_args,
validate_bool_kwarg,
Expand Down Expand Up @@ -1972,13 +1967,11 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
return cls(mgr)

@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
def to_stata(
self,
fname,
convert_dates=None,
write_index=True,
encoding="latin-1",
byteorder=None,
time_stamp=None,
data_label=None,
Expand Down Expand Up @@ -2008,8 +2001,6 @@ def to_stata(
a datetime column has timezone information.
write_index : bool
Write the index to Stata dataset.
encoding : str
Default is latin-1. Unicode is not supported.
byteorder : str
Can be ">", "<", "little", or "big". default is `sys.byteorder`.
time_stamp : datetime
Expand Down
18 changes: 0 additions & 18 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@
convert_categoricals : bool, default True
Read value labels and convert columns to Categorical/Factor variables."""

_encoding_params = """\
encoding : str, None or encoding
Encoding used to parse the files. None defaults to latin-1."""

_statafile_processing_params2 = """\
index_col : str, optional
Column to set as index.
Expand Down Expand Up @@ -108,7 +104,6 @@
%s
%s
%s
%s

Returns
-------
Expand All @@ -132,7 +127,6 @@
... do_something(chunk)
""" % (
_statafile_processing_params1,
_encoding_params,
_statafile_processing_params2,
_chunksize_params,
_iterator_params,
Expand Down Expand Up @@ -189,23 +183,19 @@
%s
%s
%s
%s
""" % (
_statafile_processing_params1,
_statafile_processing_params2,
_encoding_params,
_chunksize_params,
)


@Appender(_read_stata_doc)
@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
@deprecate_kwarg(old_arg_name="index", new_arg_name="index_col")
def read_stata(
filepath_or_buffer,
convert_dates=True,
convert_categoricals=True,
encoding=None,
index_col=None,
convert_missing=False,
preserve_dtypes=True,
Expand Down Expand Up @@ -1044,7 +1034,6 @@ def __init__(self):
class StataReader(StataParser, BaseIterator):
__doc__ = _stata_reader_doc

@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
@deprecate_kwarg(old_arg_name="index", new_arg_name="index_col")
def __init__(
self,
Expand All @@ -1056,7 +1045,6 @@ def __init__(
preserve_dtypes=True,
columns=None,
order_categoricals=True,
encoding=None,
chunksize=None,
):
super().__init__()
Expand Down Expand Up @@ -2134,14 +2122,12 @@ class StataWriter(StataParser):

_max_string_length = 244

@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
def __init__(
self,
fname,
data,
convert_dates=None,
write_index=True,
encoding="latin-1",
byteorder=None,
time_stamp=None,
data_label=None,
Expand Down Expand Up @@ -2859,8 +2845,6 @@ class StataWriter117(StataWriter):
timezone information
write_index : bool
Write the index to Stata dataset.
encoding : str
Default is latin-1. Only latin-1 and ascii are supported.
byteorder : str
Can be ">", "<", "little", or "big". default is `sys.byteorder`
time_stamp : datetime
Expand Down Expand Up @@ -2912,14 +2896,12 @@ class StataWriter117(StataWriter):

_max_string_length = 2045

@deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)
def __init__(
self,
fname,
data,
convert_dates=None,
write_index=True,
encoding="latin-1",
byteorder=None,
time_stamp=None,
data_label=None,
Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,19 +383,15 @@ def test_encoding(self, version):

# GH 4626, proper encoding handling
raw = read_stata(self.dta_encoding)
with tm.assert_produces_warning(FutureWarning):
encoded = read_stata(self.dta_encoding, encoding="latin-1")
encoded = read_stata(self.dta_encoding)
result = encoded.kreis1849[0]

expected = raw.kreis1849[0]
assert result == expected
assert isinstance(result, str)

with tm.ensure_clean() as path:
with tm.assert_produces_warning(FutureWarning):
encoded.to_stata(
path, write_index=False, version=version, encoding="latin-1"
)
encoded.to_stata(path, write_index=False, version=version)
reread_encoded = read_stata(path)
tm.assert_frame_equal(encoded, reread_encoded)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if this is still testing anything useful

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea can probably just remove this altogether since it seems directed towards encoding

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's also a the version kwarg. @bashtage is this testing anything meaningful?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is testing correctness, but since encoding is a no-op already, I think isn't adding anything. This said, I think it still uses some characters that aren't tested anywhere else.


Expand Down