-
-
Notifications
You must be signed in to change notification settings - Fork 17.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DEPR: pd.read_table #21954
DEPR: pd.read_table #21954
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
|
||
def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover | ||
r""" | ||
Read text from clipboard and pass to read_table. See read_table for the | ||
Read text from clipboard and pass to read_csv. See read_csv for the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm...this is arguably breaking (the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, maybe I am missing something, but I think the only difference between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. It should probably be fine. Don't worry about it. |
||
full argument list | ||
|
||
Parameters | ||
|
@@ -31,7 +31,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover | |
'reading from clipboard only supports utf-8 encoding') | ||
|
||
from pandas.io.clipboard import clipboard_get | ||
from pandas.io.parsers import read_table | ||
from pandas.io.parsers import read_csv | ||
text = clipboard_get() | ||
|
||
# try to decode (if needed on PY3) | ||
|
@@ -51,7 +51,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover | |
# that this came from excel and set 'sep' accordingly | ||
lines = text[:10000].split('\n')[:-1][:10] | ||
|
||
# Need to remove leading white space, since read_table | ||
# Need to remove leading white space, since read_csv | ||
# accepts: | ||
# a b | ||
# 0 1 2 | ||
|
@@ -80,7 +80,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover | |
if kwargs.get('engine') == 'python' and PY2: | ||
text = text.encode('utf-8') | ||
|
||
return read_table(StringIO(text), sep=sep, **kwargs) | ||
return read_csv(StringIO(text), sep=sep, **kwargs) | ||
|
||
|
||
def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -331,6 +331,10 @@ | |
""" % (_parser_params % (_sep_doc.format(default="','"), _engine_doc)) | ||
|
||
_read_table_doc = """ | ||
|
||
.. deprecated:: 0.24.0 | ||
Use :func:`pandas.read_csv` instead, passing ``sep='\t'`` if necessary. | ||
|
||
Read general delimited file into DataFrame | ||
|
||
%s | ||
|
@@ -540,9 +544,13 @@ def _read(filepath_or_buffer, kwds): | |
} | ||
|
||
|
||
def _make_parser_function(name, sep=','): | ||
def _make_parser_function(name, default_sep=','): | ||
|
||
default_sep = sep | ||
# prepare read_table deprecation | ||
if name == "read_table": | ||
sep = False | ||
else: | ||
sep = default_sep | ||
|
||
def parser_f(filepath_or_buffer, | ||
sep=sep, | ||
|
@@ -611,11 +619,24 @@ def parser_f(filepath_or_buffer, | |
memory_map=False, | ||
float_precision=None): | ||
|
||
# deprecate read_table GH21948 | ||
if name == "read_table": | ||
if sep is False and delimiter is None: | ||
warnings.warn("read_table is deprecated, use read_csv " | ||
"instead, passing sep='\\t'.", | ||
FutureWarning, stacklevel=2) | ||
else: | ||
warnings.warn("read_table is deprecated, use read_csv " | ||
"instead.", | ||
FutureWarning, stacklevel=2) | ||
if sep is False: | ||
sep = default_sep | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this (yes, I see the other changes that you made for passing in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the user is setting More generally, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm...I see. Okay, fair enough. |
||
|
||
# Alias sep -> delimiter. | ||
if delimiter is None: | ||
delimiter = sep | ||
|
||
if delim_whitespace and delimiter is not default_sep: | ||
if delim_whitespace and delimiter != default_sep: | ||
raise ValueError("Specified a delimiter with both sep and" | ||
" delim_whitespace=True; you can only" | ||
" specify one.") | ||
|
@@ -687,10 +708,10 @@ def parser_f(filepath_or_buffer, | |
return parser_f | ||
|
||
|
||
read_csv = _make_parser_function('read_csv', sep=',') | ||
read_csv = _make_parser_function('read_csv', default_sep=',') | ||
read_csv = Appender(_read_csv_doc)(read_csv) | ||
|
||
read_table = _make_parser_function('read_table', sep='\t') | ||
read_table = _make_parser_function('read_table', default_sep='\t') | ||
read_table = Appender(_read_table_doc)(read_table) | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -131,7 +131,6 @@ def test_iterator(self): | |
|
||
@pytest.mark.parametrize('reader, module, error_class, fn_ext', [ | ||
(pd.read_csv, 'os', FileNotFoundError, 'csv'), | ||
(pd.read_table, 'os', FileNotFoundError, 'csv'), | ||
(pd.read_fwf, 'os', FileNotFoundError, 'txt'), | ||
(pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'), | ||
(pd.read_feather, 'feather', Exception, 'feather'), | ||
|
@@ -149,9 +148,14 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): | |
with pytest.raises(error_class): | ||
reader(path) | ||
|
||
def test_read_non_existant_read_table(self): | ||
path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv') | ||
with pytest.raises(FileNotFoundError): | ||
with tm.assert_produces_warning(FutureWarning): | ||
pd.read_table(path) | ||
|
||
@pytest.mark.parametrize('reader, module, path', [ | ||
(pd.read_csv, 'os', ('io', 'data', 'iris.csv')), | ||
(pd.read_table, 'os', ('io', 'data', 'iris.csv')), | ||
(pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')), | ||
(pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')), | ||
(pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')), | ||
|
@@ -170,6 +174,22 @@ def test_read_fspath_all(self, reader, module, path, datapath): | |
mypath = CustomFSPath(path) | ||
result = reader(mypath) | ||
expected = reader(path) | ||
|
||
if path.endswith('.pickle'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why was this change needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no change. These six lines repeat at the bottom of |
||
# categorical | ||
tm.assert_categorical_equal(result, expected) | ||
else: | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_read_fspath_all_read_table(self, datapath): | ||
path = datapath('io', 'data', 'iris.csv') | ||
|
||
mypath = CustomFSPath(path) | ||
with tm.assert_produces_warning(FutureWarning): | ||
result = pd.read_table(mypath) | ||
with tm.assert_produces_warning(FutureWarning): | ||
expected = pd.read_table(path) | ||
|
||
if path.endswith('.pickle'): | ||
# categorical | ||
tm.assert_categorical_equal(result, expected) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As it stands, this is a breaking API change.
@jreback :
0.21.0
deprecations are fair game to remove in0.24.0
, no?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As far as I know, the only difference between
read_csv
andread_table
is the default value ofsep
. Here,sep
is explicitly set, so I don't see how this changes anything (except moving away fromread_table
)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right: for the vast majority, it's not going to be breaking. It's only in a corner case situation (e.g. you pass in
None
) that you will see a change. This is fine then.