diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a0076118a28a75..c5b081c4ac8349 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -319,6 +319,7 @@ Deprecations - :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`). - :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) +- :func:`pandas.read_table` is deprecated. Use ``pandas.read_csv`` instead (:issue:`21948`) - .. _whatsnew_0240.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4578d2ac08199a..6065d9cd0aaf67 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1592,8 +1592,8 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, "for from_csv when changing your function calls", FutureWarning, stacklevel=2) - from pandas.io.parsers import read_table - return read_table(path, header=header, sep=sep, + from pandas.io.parsers import read_csv + return read_csv(path, header=header, sep=sep, parse_dates=parse_dates, index_col=index_col, encoding=encoding, tupleize_cols=tupleize_cols, infer_datetime_format=infer_datetime_format) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 486040fa52f35a..94b4fd2fce1bab 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -326,6 +326,10 @@ """ % (_parser_params % (_sep_doc.format(default="','"), _engine_doc)) _read_table_doc = """ + +.. deprecated:: 0.24.0 + Use :func:`pandas.read_csv` instead, passing `sep='\t'` if necessary. + Read general delimited file into DataFrame %s @@ -539,6 +543,10 @@ def _make_parser_function(name, sep=','): default_sep = sep + # prepare read_table deprecation + if name == "read_table": + sep = False + def parser_f(filepath_or_buffer, sep=sep, delimiter=None, @@ -606,6 +614,19 @@ def parser_f(filepath_or_buffer, memory_map=False, float_precision=None): + # deprecate read_table + if name == "read_table": + if sep is False and delimiter is None: + warnings.warn("read_table is deprecated, use read_csv " + "with sep='\\t' instead.", + FutureWarning, stacklevel=2) + else: + warnings.warn("read_table is deprecated, use read_csv " + "instead.", + FutureWarning, stacklevel=2) + if sep is False: + sep = '\t' + # Alias sep -> delimiter. if delimiter is None: delimiter = sep diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 191e3f37f1c37a..21bc76d71097bd 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1225,8 +1225,9 @@ def test_to_string(self): lines = result.split('\n') header = lines[0].strip().split() joined = '\n'.join(re.sub(r'\s+', ' ', x).strip() for x in lines[1:]) - recons = read_table(StringIO(joined), names=header, - header=None, sep=' ') + with tm.assert_produces_warning(FutureWarning): + recons = read_table(StringIO(joined), names=header, + header=None, sep=' ') tm.assert_series_equal(recons['B'], biggie['B']) assert recons['A'].count() == biggie['A'].count() assert (np.abs(recons['A'].dropna() - diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index 9dc7b070f889d4..eb7d5bc848caae 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -34,7 +34,8 @@ def test_buffer_overflow(self, malf): # buffer overflows in tokenizer.c cperr = 'Buffer overflow caught - possible malformed input file.' with pytest.raises(pd.errors.ParserError) as excinfo: - self.read_table(StringIO(malf)) + with tm.assert_produces_warning(FutureWarning): + self.read_table(StringIO(malf)) assert cperr in str(excinfo.value) def test_buffer_rd_bytes(self): diff --git a/pandas/tests/io/parser/comment.py b/pandas/tests/io/parser/comment.py index 9987a017cf9858..c6bf057178a542 100644 --- a/pandas/tests/io/parser/comment.py +++ b/pandas/tests/io/parser/comment.py @@ -24,8 +24,9 @@ def test_comment(self): df = self.read_csv(StringIO(data), comment='#') tm.assert_numpy_array_equal(df.values, expected) - df = self.read_table(StringIO(data), sep=',', comment='#', - na_values=['NaN']) + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep=',', comment='#', + na_values=['NaN']) tm.assert_numpy_array_equal(df.values, expected) def test_line_comment(self): diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 9e871d27f0ce8a..e45f452d480fc4 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -94,7 +94,8 @@ def test_1000_sep(self): df = self.read_csv(StringIO(data), sep='|', thousands=',') tm.assert_frame_equal(df, expected) - df = self.read_table(StringIO(data), sep='|', thousands=',') + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep='|', thousands=',') tm.assert_frame_equal(df, expected) def test_squeeze(self): @@ -105,8 +106,9 @@ def test_squeeze(self): """ idx = Index(['a', 'b', 'c'], name=0) expected = Series([1, 2, 3], name=1, index=idx) - result = self.read_table(StringIO(data), sep=',', index_col=0, - header=None, squeeze=True) + with tm.assert_produces_warning(FutureWarning): + result = self.read_table(StringIO(data), sep=',', index_col=0, + header=None, squeeze=True) assert isinstance(result, Series) tm.assert_series_equal(result, expected) @@ -129,8 +131,9 @@ def test_malformed(self): """ msg = 'Expected 3 fields in line 4, saw 5' with tm.assert_raises_regex(Exception, msg): - self.read_table(StringIO(data), sep=',', - header=1, comment='#') + with tm.assert_produces_warning(FutureWarning): + self.read_table(StringIO(data), sep=',', + header=1, comment='#') # first chunk data = """ignore @@ -143,10 +146,11 @@ def test_malformed(self): """ msg = 'Expected 3 fields in line 6, saw 5' with tm.assert_raises_regex(Exception, msg): - it = self.read_table(StringIO(data), sep=',', - header=1, comment='#', - iterator=True, chunksize=1, - skiprows=[2]) + with tm.assert_produces_warning(FutureWarning): + it = self.read_table(StringIO(data), sep=',', + header=1, comment='#', + iterator=True, chunksize=1, + skiprows=[2]) it.read(5) # middle chunk @@ -160,9 +164,10 @@ def test_malformed(self): """ msg = 'Expected 3 fields in line 6, saw 5' with tm.assert_raises_regex(Exception, msg): - it = self.read_table(StringIO(data), sep=',', header=1, - comment='#', iterator=True, chunksize=1, - skiprows=[2]) + with tm.assert_produces_warning(FutureWarning): + it = self.read_table(StringIO(data), sep=',', header=1, + comment='#', iterator=True, chunksize=1, + skiprows=[2]) it.read(3) # last chunk @@ -176,9 +181,10 @@ def test_malformed(self): """ msg = 'Expected 3 fields in line 6, saw 5' with tm.assert_raises_regex(Exception, msg): - it = self.read_table(StringIO(data), sep=',', header=1, - comment='#', iterator=True, chunksize=1, - skiprows=[2]) + with tm.assert_produces_warning(FutureWarning): + it = self.read_table(StringIO(data), sep=',', header=1, + comment='#', iterator=True, chunksize=1, + skiprows=[2]) it.read() # skipfooter is not supported with the C parser yet @@ -193,9 +199,10 @@ def test_malformed(self): """ msg = 'Expected 3 fields in line 4, saw 5' with tm.assert_raises_regex(Exception, msg): - self.read_table(StringIO(data), sep=',', - header=1, comment='#', - skipfooter=1) + with tm.assert_produces_warning(FutureWarning): + self.read_table(StringIO(data), sep=',', + header=1, comment='#', + skipfooter=1) def test_quoting(self): bad_line_small = """printer\tresult\tvariant_name @@ -204,11 +211,13 @@ def test_quoting(self): Klosterdruckerei\tKlosterdruckerei (1609-1805)\t"Furststiftische Hofdruckerei, (1609-1805)\tGaller, Alois Klosterdruckerei\tKlosterdruckerei (1609-1805)\tHochfurstliche Buchhandlung """ # noqa - pytest.raises(Exception, self.read_table, StringIO(bad_line_small), - sep='\t') + with pytest.raises(Exception): + with tm.assert_produces_warning(FutureWarning): + self.read_table(StringIO(bad_line_small), sep='\t') good_line_small = bad_line_small + '"' - df = self.read_table(StringIO(good_line_small), sep='\t') + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(good_line_small), sep='\t') assert len(df) == 3 def test_unnamed_columns(self): @@ -220,7 +229,8 @@ def test_unnamed_columns(self): expected = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], dtype=np.int64) - df = self.read_table(StringIO(data), sep=',') + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep=',') tm.assert_almost_equal(df.values, expected) tm.assert_index_equal(df.columns, Index(['A', 'B', 'C', 'Unnamed: 3', @@ -255,8 +265,9 @@ def test_read_csv_low_memory_no_rows_with_index(self): def test_read_csv_dataframe(self): df = self.read_csv(self.csv1, index_col=0, parse_dates=True) - df2 = self.read_table(self.csv1, sep=',', index_col=0, - parse_dates=True) + with tm.assert_produces_warning(FutureWarning): + df2 = self.read_table(self.csv1, sep=',', index_col=0, + parse_dates=True) tm.assert_index_equal(df.columns, pd.Index(['A', 'B', 'C', 'D'])) assert df.index.name == 'index' assert isinstance( @@ -266,8 +277,9 @@ def test_read_csv_dataframe(self): def test_read_csv_no_index_name(self): df = self.read_csv(self.csv2, index_col=0, parse_dates=True) - df2 = self.read_table(self.csv2, sep=',', index_col=0, - parse_dates=True) + with tm.assert_produces_warning(FutureWarning): + df2 = self.read_table(self.csv2, sep=',', index_col=0, + parse_dates=True) tm.assert_index_equal(df.columns, pd.Index(['A', 'B', 'C', 'D', 'E'])) assert isinstance(df.index[0], (datetime, np.datetime64, Timestamp)) @@ -276,7 +288,8 @@ def test_read_csv_no_index_name(self): def test_read_table_unicode(self): fin = BytesIO(u('\u0141aski, Jan;1').encode('utf-8')) - df1 = self.read_table(fin, sep=";", encoding="utf-8", header=None) + with tm.assert_produces_warning(FutureWarning): + df1 = self.read_table(fin, sep=";", encoding="utf-8", header=None) assert isinstance(df1[0].values[0], compat.text_type) def test_read_table_wrong_num_columns(self): @@ -303,9 +316,11 @@ def test_read_duplicate_index_explicit(self): 'index', verify_integrity=False) tm.assert_frame_equal(result, expected) - result = self.read_table(StringIO(data), sep=',', index_col=0) - expected = self.read_table(StringIO(data), sep=',', ).set_index( - 'index', verify_integrity=False) + with tm.assert_produces_warning(FutureWarning): + result = self.read_table(StringIO(data), sep=',', index_col=0) + with tm.assert_produces_warning(FutureWarning): + expected = self.read_table(StringIO(data), sep=',', ).set_index( + 'index', verify_integrity=False) tm.assert_frame_equal(result, expected) def test_read_duplicate_index_implicit(self): @@ -320,7 +335,8 @@ def test_read_duplicate_index_implicit(self): # make sure an error isn't thrown self.read_csv(StringIO(data)) - self.read_table(StringIO(data), sep=',') + with tm.assert_produces_warning(FutureWarning): + self.read_table(StringIO(data), sep=',') def test_parse_bools(self): data = """A,B @@ -512,8 +528,9 @@ def test_iterator(self): chunks = list(parser) tm.assert_frame_equal(chunks[0], df[1:3]) - treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, - iterator=True) + with tm.assert_produces_warning(FutureWarning): + treader = self.read_table(StringIO(self.data1), sep=',', + index_col=0, iterator=True) assert isinstance(treader, TextFileReader) # gh-3967: stopping iteration when chunksize is specified @@ -636,7 +653,8 @@ def test_no_unnamed_index(self): 1 2 0 c d 2 2 2 e f """ - df = self.read_table(StringIO(data), sep=' ') + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep=' ') assert df.index.name is None def test_read_csv_parse_simple_list(self): @@ -655,19 +673,23 @@ def test_url(self, datapath): # HTTP(S) url = ('https://raw.github.com/pandas-dev/pandas/master/' 'pandas/tests/io/parser/data/salaries.csv') - url_table = self.read_table(url) + with tm.assert_produces_warning(FutureWarning): + url_table = self.read_table(url) localtable = datapath('io', 'parser', 'data', 'salaries.csv') - local_table = self.read_table(localtable) + with tm.assert_produces_warning(FutureWarning): + local_table = self.read_table(localtable) tm.assert_frame_equal(url_table, local_table) # TODO: ftp testing @pytest.mark.slow def test_file(self, datapath): localtable = datapath('io', 'parser', 'data', 'salaries.csv') - local_table = self.read_table(localtable) + with tm.assert_produces_warning(FutureWarning): + local_table = self.read_table(localtable) try: - url_table = self.read_table('file://localhost/' + localtable) + with tm.assert_produces_warning(FutureWarning): + url_table = self.read_table('file://localhost/' + localtable) except URLError: # fails on some systems pytest.skip("failing on %s" % @@ -757,12 +779,14 @@ def test_utf16_example(self, datapath): path = datapath('io', 'parser', 'data', 'utf16_ex.txt') # it works! and is the right length - result = self.read_table(path, encoding='utf-16') + with tm.assert_produces_warning(FutureWarning): + result = self.read_table(path, encoding='utf-16') assert len(result) == 50 if not compat.PY3: buf = BytesIO(open(path, 'rb').read()) - result = self.read_table(buf, encoding='utf-16') + with tm.assert_produces_warning(FutureWarning): + result = self.read_table(buf, encoding='utf-16') assert len(result) == 50 def test_unicode_encoding(self, datapath): @@ -887,7 +911,8 @@ def test_catch_too_many_names(self): def test_ignore_leading_whitespace(self): # see gh-3374, gh-6607 data = ' a b c\n 1 2 3\n 4 5 6\n 7 8 9' - result = self.read_table(StringIO(data), sep=r'\s+') + with tm.assert_produces_warning(FutureWarning): + result = self.read_table(StringIO(data), sep=r'\s+') expected = DataFrame({'a': [1, 4, 7], 'b': [2, 5, 8], 'c': [3, 6, 9]}) tm.assert_frame_equal(result, expected) @@ -1146,18 +1171,21 @@ def test_trailing_spaces(self): header=None, delim_whitespace=True, skiprows=[0, 1, 2, 3, 5, 6], skip_blank_lines=True) tm.assert_frame_equal(df, expected) - df = self.read_table(StringIO(data.replace(',', ' ')), - header=None, delim_whitespace=True, - skiprows=[0, 1, 2, 3, 5, 6], - skip_blank_lines=True) + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data.replace(',', ' ')), + header=None, delim_whitespace=True, + skiprows=[0, 1, 2, 3, 5, 6], + skip_blank_lines=True) tm.assert_frame_equal(df, expected) # gh-8983: test skipping set of rows after a row with trailing spaces expected = DataFrame({"A": [1., 5.1], "B": [2., np.nan], "C": [4., 10]}) - df = self.read_table(StringIO(data.replace(',', ' ')), - delim_whitespace=True, - skiprows=[1, 2, 3, 5, 6], skip_blank_lines=True) + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data.replace(',', ' ')), + delim_whitespace=True, + skiprows=[1, 2, 3, 5, 6], + skip_blank_lines=True) tm.assert_frame_equal(df, expected) def test_raise_on_sep_with_delim_whitespace(self): @@ -1165,7 +1193,9 @@ def test_raise_on_sep_with_delim_whitespace(self): data = 'a b c\n1 2 3' with tm.assert_raises_regex(ValueError, 'you can only specify one'): - self.read_table(StringIO(data), sep=r'\s', delim_whitespace=True) + with tm.assert_produces_warning(FutureWarning): + self.read_table(StringIO(data), sep=r'\s', + delim_whitespace=True) def test_single_char_leading_whitespace(self): # see gh-9710 @@ -1232,14 +1262,16 @@ def test_regex_separator(self): b 1 2 3 4 c 1 2 3 4 """ - df = self.read_table(StringIO(data), sep=r'\s+') + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep=r'\s+') expected = self.read_csv(StringIO(re.sub('[ ]+', ',', data)), index_col=0) assert expected.index.name is None tm.assert_frame_equal(df, expected) data = ' a b c\n1 2 3 \n4 5 6\n 7 8 9' - result = self.read_table(StringIO(data), sep=r'\s+') + with tm.assert_produces_warning(FutureWarning): + result = self.read_table(StringIO(data), sep=r'\s+') expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) tm.assert_frame_equal(result, expected) @@ -1305,7 +1337,8 @@ def test_iteration_open_handle(self): pytest.raises(Exception, self.read_table, f, squeeze=True, header=None) else: - result = self.read_table(f, squeeze=True, header=None) + with tm.assert_produces_warning(FutureWarning): + result = self.read_table(f, squeeze=True, header=None) expected = Series(['DDD', 'EEE', 'FFF', 'GGG'], name=0) tm.assert_series_equal(result, expected) @@ -1327,8 +1360,9 @@ def test_1000_sep_with_decimal(self): df = self.read_csv(StringIO(data), sep='|', thousands=',', decimal='.') tm.assert_frame_equal(df, expected) - df = self.read_table(StringIO(data), sep='|', - thousands=',', decimal='.') + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep='|', + thousands=',', decimal='.') tm.assert_frame_equal(df, expected) data_with_odd_sep = """A|B|C @@ -1339,8 +1373,9 @@ def test_1000_sep_with_decimal(self): sep='|', thousands='.', decimal=',') tm.assert_frame_equal(df, expected) - df = self.read_table(StringIO(data_with_odd_sep), - sep='|', thousands='.', decimal=',') + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data_with_odd_sep), + sep='|', thousands='.', decimal=',') tm.assert_frame_equal(df, expected) def test_euro_decimal_format(self): diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index 8060ebf2fbcd41..bd5c5b8fde330d 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -137,9 +137,11 @@ def test_categorical_dtype_encoding(self, datapath): pth = datapath('io', 'parser', 'data', 'utf16_ex.txt') encoding = 'utf-16' - expected = self.read_table(pth, encoding=encoding) + with tm.assert_produces_warning(FutureWarning): + expected = self.read_table(pth, encoding=encoding) expected = expected.apply(Categorical) - actual = self.read_table(pth, encoding=encoding, dtype='category') + with tm.assert_produces_warning(FutureWarning): + actual = self.read_table(pth, encoding=encoding, dtype='category') tm.assert_frame_equal(actual, expected) def test_categorical_dtype_chunksize(self): diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 3fb0650348763c..a4ff15d838548c 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -35,15 +35,17 @@ def test_bool_header_arg(self): with pytest.raises(TypeError): self.read_csv(StringIO(data), header=arg) with pytest.raises(TypeError): - self.read_table(StringIO(data), header=arg) + with tm.assert_produces_warning(FutureWarning): + self.read_table(StringIO(data), header=arg) def test_no_header_prefix(self): data = """1,2,3,4,5 6,7,8,9,10 11,12,13,14,15 """ - df_pref = self.read_table(StringIO(data), sep=',', prefix='Field', - header=None) + with tm.assert_produces_warning(FutureWarning): + df_pref = self.read_table(StringIO(data), sep=',', + prefix='Field', header=None) expected = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], @@ -250,12 +252,15 @@ def test_no_header(self): 6,7,8,9,10 11,12,13,14,15 """ - df = self.read_table(StringIO(data), sep=',', header=None) - df_pref = self.read_table(StringIO(data), sep=',', prefix='X', - header=None) + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep=',', header=None) + with tm.assert_produces_warning(FutureWarning): + df_pref = self.read_table(StringIO(data), sep=',', prefix='X', + header=None) names = ['foo', 'bar', 'baz', 'quux', 'panda'] - df2 = self.read_table(StringIO(data), sep=',', names=names) + with tm.assert_produces_warning(FutureWarning): + df2 = self.read_table(StringIO(data), sep=',', names=names) expected = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], dtype=np.int64) diff --git a/pandas/tests/io/parser/mangle_dupes.py b/pandas/tests/io/parser/mangle_dupes.py index 6df69eb475bf76..fca1d7addf772b 100644 --- a/pandas/tests/io/parser/mangle_dupes.py +++ b/pandas/tests/io/parser/mangle_dupes.py @@ -18,15 +18,23 @@ def test_basic(self): # once it is actually supported (gh-12935) data = "a,a,b,b,b\n1,2,3,4,5" - for method in ("read_csv", "read_table"): - # Check default behavior. - expected = ["a", "a.1", "b", "b.1", "b.2"] - df = getattr(self, method)(StringIO(data), sep=",") - assert list(df.columns) == expected - - df = getattr(self, method)(StringIO(data), sep=",", - mangle_dupe_cols=True) - assert list(df.columns) == expected + # Check read_csv default behavior. + expected = ["a", "a.1", "b", "b.1", "b.2"] + df = self.read_csv(StringIO(data), sep=",") + assert list(df.columns) == expected + df = self.read_csv(StringIO(data), sep=",", + mangle_dupe_cols=True) + assert list(df.columns) == expected + + # Check read_table default behavior. + expected = ["a", "a.1", "b", "b.1", "b.2"] + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep=",") + assert list(df.columns) == expected + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(data), sep=",", + mangle_dupe_cols=True) + assert list(df.columns) == expected def test_basic_names(self): # See gh-7160 diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index d2c3f82e95c4dd..257dc41e5251e4 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -107,12 +107,14 @@ def test_custom_na_values(self): df = self.read_csv(StringIO(data), na_values=['baz'], skiprows=[1]) tm.assert_numpy_array_equal(df.values, expected) - df2 = self.read_table(StringIO(data), sep=',', na_values=['baz'], - skiprows=[1]) + with tm.assert_produces_warning(FutureWarning): + df2 = self.read_table(StringIO(data), sep=',', na_values=['baz'], + skiprows=[1]) tm.assert_numpy_array_equal(df2.values, expected) - df3 = self.read_table(StringIO(data), sep=',', na_values='baz', - skiprows=[1]) + with tm.assert_produces_warning(FutureWarning): + df3 = self.read_table(StringIO(data), sep=',', na_values='baz', + skiprows=[1]) tm.assert_numpy_array_equal(df3.values, expected) def test_bool_na_values(self): diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index c0616ebbab4a5d..09493c90038688 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -95,7 +95,8 @@ def test_BytesIO_input(self): "Bytes-related test - only needs to work on Python 3") data = BytesIO("שלום::1234\n562::123".encode('cp1255')) - result = self.read_table(data, sep="::", encoding='cp1255') + with tm.assert_produces_warning(FutureWarning): + result = self.read_table(data, sep="::", encoding='cp1255') expected = DataFrame([[562, 123]], columns=["שלום", "1234"]) tm.assert_frame_equal(result, expected) @@ -169,7 +170,8 @@ def test_read_table_buglet_4x_multiindex(self): a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" - df = self.read_table(StringIO(text), sep=r'\s+') + with tm.assert_produces_warning(FutureWarning): + df = self.read_table(StringIO(text), sep=r'\s+') assert df.index.names == ('one', 'two', 'three', 'four') # see gh-6893 @@ -177,7 +179,8 @@ def test_read_table_buglet_4x_multiindex(self): expected = DataFrame.from_records( [(1, 3, 7, 0, 3, 6), (3, 1, 4, 1, 5, 9)], columns=list('abcABC'), index=list('abc')) - actual = self.read_table(StringIO(data), sep=r'\s+') + with tm.assert_produces_warning(FutureWarning): + actual = self.read_table(StringIO(data), sep=r'\s+') tm.assert_frame_equal(actual, expected) def test_skipfooter_with_decimal(self): diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index e2243b8087a5b3..d441d2f19a0cca 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -44,7 +44,8 @@ def check_compressed_urls(salaries_table, compression, extension, mode, if mode != 'explicit': compression = mode - url_table = read_table(url, compression=compression, engine=engine) + with tm.assert_produces_warning(FutureWarning): + url_table = read_table(url, compression=compression, engine=engine) tm.assert_frame_equal(url_table, salaries_table) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 3117f6fae55da0..f9ebc2cd56690b 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -9,6 +9,8 @@ test suite as new feature support is added to the parsers. """ +import warnings + import pandas.io.parsers as parsers import pandas.util.testing as tm @@ -43,23 +45,43 @@ def test_c_engine(self): # specify C engine with unsupported options (raise) with tm.assert_raises_regex(ValueError, msg): - read_table(StringIO(data), engine='c', - sep=None, delim_whitespace=False) + with tm.assert_produces_warning(FutureWarning): + read_table(StringIO(data), engine='c', + sep=None, delim_whitespace=False) with tm.assert_raises_regex(ValueError, msg): - read_table(StringIO(data), engine='c', sep=r'\s') + with tm.assert_produces_warning(FutureWarning): + read_table(StringIO(data), engine='c', sep=r'\s') with tm.assert_raises_regex(ValueError, msg): - read_table(StringIO(data), engine='c', quotechar=chr(128)) + with tm.assert_produces_warning(FutureWarning): + read_table(StringIO(data), engine='c', quotechar=chr(128)) with tm.assert_raises_regex(ValueError, msg): - read_table(StringIO(data), engine='c', skipfooter=1) + with tm.assert_produces_warning(FutureWarning): + read_table(StringIO(data), engine='c', skipfooter=1) # specify C-unsupported options without python-unsupported options with tm.assert_produces_warning(parsers.ParserWarning): + warnings.simplefilter("ignore", category=FutureWarning) + read_table(StringIO(data), sep=None, delim_whitespace=False) + with tm.assert_produces_warning(FutureWarning): + warnings.simplefilter("ignore", category=parsers.ParserWarning) read_table(StringIO(data), sep=None, delim_whitespace=False) with tm.assert_produces_warning(parsers.ParserWarning): + warnings.simplefilter("ignore", category=FutureWarning) + read_table(StringIO(data), quotechar=chr(128)) + with tm.assert_produces_warning(FutureWarning): + warnings.simplefilter("ignore", category=parsers.ParserWarning) read_table(StringIO(data), quotechar=chr(128)) with tm.assert_produces_warning(parsers.ParserWarning): + warnings.simplefilter("ignore", category=FutureWarning) + read_table(StringIO(data), sep=r'\s') + with tm.assert_produces_warning(FutureWarning): + warnings.simplefilter("ignore", category=parsers.ParserWarning) read_table(StringIO(data), sep=r'\s') with tm.assert_produces_warning(parsers.ParserWarning): + warnings.simplefilter("ignore", category=FutureWarning) + read_table(StringIO(data), skipfooter=1) + with tm.assert_produces_warning(FutureWarning): + warnings.simplefilter("ignore", category=parsers.ParserWarning) read_table(StringIO(data), skipfooter=1) text = """ A B C D E @@ -70,9 +92,11 @@ def test_c_engine(self): msg = 'Error tokenizing data' with tm.assert_raises_regex(ParserError, msg): - read_table(StringIO(text), sep='\\s+') + with tm.assert_produces_warning(FutureWarning): + read_table(StringIO(text), sep='\\s+') with tm.assert_raises_regex(ParserError, msg): - read_table(StringIO(text), engine='c', sep='\\s+') + with tm.assert_produces_warning(FutureWarning): + read_table(StringIO(text), engine='c', sep='\\s+') msg = "Only length-1 thousands markers supported" data = """A|B|C diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 3caee2b44c5798..d3639e3fcdfbe3 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -519,7 +519,8 @@ def test_xs_level_multiple(self): a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" - df = read_table(StringIO(text), sep=r'\s+', engine='python') + with tm.assert_produces_warning(FutureWarning): + df = read_table(StringIO(text), sep=r'\s+', engine='python') result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') @@ -554,7 +555,8 @@ def test_xs_level0(self): a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" - df = read_table(StringIO(text), sep=r'\s+', engine='python') + with tm.assert_produces_warning(FutureWarning): + df = read_table(StringIO(text), sep=r'\s+', engine='python') result = df.xs('a', level=0) expected = df.xs('a')