From aa88215a3f6390096a022bdc9401e7d215677006 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 28 Jul 2016 20:21:55 -0400 Subject: [PATCH] API: Deprecate skip_footer in read_csv Title is self-explanatory. Closes gh-13349 and partially undoes this commit back in `v0.9.0`. With such a massive API now, having duplicate arguments makes managing it way less practical. Author: gfyoung Closes #13386 from gfyoung/deprecate-dup-skipfooter and squashes the following commits: d21345f [gfyoung] API: Deprecate skip_footer in read_csv --- doc/source/io.rst | 4 +- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/io/excel.py | 2 +- pandas/io/parsers.py | 43 +++++++++++--------- pandas/io/tests/parser/common.py | 12 +++--- pandas/io/tests/parser/python_parser_only.py | 4 +- pandas/io/tests/parser/test_unsupported.py | 13 ++++-- pandas/parser.pyx | 16 ++++---- 8 files changed, 54 insertions(+), 41 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index e3b03b5a39b37..ee5734aaf9494 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -175,6 +175,8 @@ skiprows : list-like or integer, default ``None`` of the file. skipfooter : int, default ``0`` Number of lines at bottom of file to skip (unsupported with engine='c'). +skip_footer : int, default ``0`` + DEPRECATED: use the ``skipfooter`` parameter instead, as they are identical nrows : int, default ``None`` Number of rows of file to read. Useful for reading pieces of large files. low_memory : boolean, default ``True`` @@ -1411,7 +1413,7 @@ back to python if C-unsupported options are specified. Currently, C-unsupported options include: - ``sep`` other than a single character (e.g. regex separators) -- ``skip_footer`` +- ``skipfooter`` - ``sep=None`` with ``delim_whitespace=False`` Specifying any of the above options will produce a ``ParserWarning`` unless the diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 392d58b3ef98a..03f8dbc20b52e 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -671,6 +671,7 @@ Deprecations - ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`) - ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`) - ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`) +- ``skip_footer`` has been deprecated in ``pd.read_csv()`` in favor of ``skipfooter`` and will be removed in a future version (:issue:`13349`) - top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) - ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`) - ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here ` (:issue:`736`) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 703cdbeaa7a8f..b415661c99438 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -473,7 +473,7 @@ def _parse_cell(cell_contents, cell_typ): parse_dates=parse_dates, date_parser=date_parser, skiprows=skiprows, - skip_footer=skip_footer, + skipfooter=skip_footer, squeeze=squeeze, **kwds) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 353bddbed3566..abbe7bdf18461 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -125,6 +125,8 @@ at the start of the file skipfooter : int, default 0 Number of lines at bottom of file to skip (Unsupported with engine='c') +skip_footer : int, default 0 + DEPRECATED: use the `skipfooter` parameter instead, as they are identical nrows : int, default None Number of rows of file to read. Useful for reading pieces of large files na_values : str or list-like or dict, default None @@ -341,9 +343,6 @@ def _validate_nrows(nrows): def _read(filepath_or_buffer, kwds): "Generic reader of line files." encoding = kwds.get('encoding', None) - skipfooter = kwds.pop('skipfooter', None) - if skipfooter is not None: - kwds['skip_footer'] = skipfooter # If the input could be a filename, check for a recognizable compression # extension. If we're reading from a URL, the `get_filepath_or_buffer` @@ -411,8 +410,8 @@ def _read(filepath_or_buffer, kwds): 'na_values': None, 'true_values': None, 'false_values': None, - 'skip_footer': 0, 'converters': None, + 'skipfooter': 0, 'keep_default_na': True, 'thousands': None, @@ -461,7 +460,7 @@ def _read(filepath_or_buffer, kwds): 'widths': None, } -_c_unsupported = set(['skip_footer']) +_c_unsupported = set(['skipfooter']) _python_unsupported = set([ 'low_memory', 'buffer_lines', @@ -503,7 +502,6 @@ def parser_f(filepath_or_buffer, false_values=None, skipinitialspace=False, skiprows=None, - skipfooter=None, nrows=None, # NA and Missing Data Handling @@ -541,8 +539,8 @@ def parser_f(filepath_or_buffer, error_bad_lines=True, warn_bad_lines=True, - # Deprecated - skip_footer=0, + skipfooter=0, + skip_footer=0, # deprecated # Internal doublequote=True, @@ -570,6 +568,13 @@ def parser_f(filepath_or_buffer, engine = 'c' engine_specified = False + if skip_footer != 0: + warnings.warn("The 'skip_footer' argument has " + "been deprecated and will be removed " + "in a future version. Please use the " + "'skipfooter' argument instead.", + FutureWarning, stacklevel=2) + kwds = dict(delimiter=delimiter, engine=engine, dialect=dialect, @@ -768,9 +773,9 @@ def _clean_options(self, options, engine): # C engine not supported yet if engine == 'c': - if options['skip_footer'] > 0: + if options['skipfooter'] > 0: fallback_reason = "the 'c' engine does not support"\ - " skip_footer" + " skipfooter" engine = 'python' if sep is None and not delim_whitespace: @@ -903,8 +908,8 @@ def _failover_to_python(self): def read(self, nrows=None): if nrows is not None: - if self.options.get('skip_footer'): - raise ValueError('skip_footer not supported for iteration') + if self.options.get('skipfooter'): + raise ValueError('skipfooter not supported for iteration') ret = self._engine.read(nrows) @@ -1591,7 +1596,7 @@ def TextParser(*args, **kwds): date_parser : function, default None skiprows : list of integers Row numbers to skip - skip_footer : int + skipfooter : int Number of line at bottom of file to skip converters : dict, default None Dict of functions for converting values in certain columns. Keys can @@ -1704,7 +1709,7 @@ def __init__(self, f, **kwds): self.memory_map = kwds['memory_map'] self.skiprows = kwds['skiprows'] - self.skip_footer = kwds['skip_footer'] + self.skipfooter = kwds['skipfooter'] self.delimiter = kwds['delimiter'] self.quotechar = kwds['quotechar'] @@ -2340,7 +2345,7 @@ def _rows_to_cols(self, content): content, min_width=col_len).T) zip_len = len(zipped_content) - if self.skip_footer < 0: + if self.skipfooter < 0: raise ValueError('skip footer cannot be negative') # Loop through rows to verify lengths are correct. @@ -2353,8 +2358,8 @@ def _rows_to_cols(self, content): break footers = 0 - if self.skip_footer: - footers = self.skip_footer + if self.skipfooter: + footers = self.skipfooter row_num = self.pos - (len(content) - i + footers) @@ -2440,8 +2445,8 @@ def _get_lines(self, rows=None): else: lines = new_rows - if self.skip_footer: - lines = lines[:-self.skip_footer] + if self.skipfooter: + lines = lines[:-self.skipfooter] lines = self._check_comments(lines) if self.skip_blank_lines: diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index f3adb0e39982c..7e9513c0bcff3 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -218,9 +218,9 @@ def test_malformed(self): skiprows=[2]) it.read() - # skip_footer is not supported with the C parser yet + # skipfooter is not supported with the C parser yet if self.engine == 'python': - # skip_footer + # skipfooter data = """ignore A,B,C 1,2,3 # comment @@ -232,7 +232,7 @@ def test_malformed(self): with tm.assertRaisesRegexp(Exception, msg): self.read_table(StringIO(data), sep=',', header=1, comment='#', - skip_footer=1) + skipfooter=1) def test_quoting(self): bad_line_small = """printer\tresult\tvariant_name @@ -536,11 +536,11 @@ def test_iterator(self): self.assertEqual(len(result), 3) tm.assert_frame_equal(pd.concat(result), expected) - # skip_footer is not supported with the C parser yet + # skipfooter is not supported with the C parser yet if self.engine == 'python': - # test bad parameter (skip_footer) + # test bad parameter (skipfooter) reader = self.read_csv(StringIO(self.data1), index_col=0, - iterator=True, skip_footer=True) + iterator=True, skipfooter=True) self.assertRaises(ValueError, reader.read, 3) def test_pass_names_with_index(self): diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index ad81dbb9f6e0f..619b6b63568f3 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -98,7 +98,7 @@ def test_single_line(self): finally: sys.stdout = sys.__stdout__ - def test_skip_footer(self): + def test_skipfooter(self): # see gh-6607 data = """A,B,C 1,2,3 @@ -107,7 +107,7 @@ def test_skip_footer(self): want to skip this also also skip this """ - result = self.read_csv(StringIO(data), skip_footer=2) + result = self.read_csv(StringIO(data), skipfooter=2) no_footer = '\n'.join(data.split('\n')[:-3]) expected = self.read_csv(StringIO(no_footer)) tm.assert_frame_equal(result, expected) diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py index c8ad46af10795..ef8f7967193ff 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/io/tests/parser/test_unsupported.py @@ -52,7 +52,7 @@ def test_c_engine(self): with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), sep='\s', dtype={'a': float}) with tm.assertRaisesRegexp(ValueError, msg): - read_table(StringIO(data), skip_footer=1, dtype={'a': float}) + read_table(StringIO(data), skipfooter=1, dtype={'a': float}) # specify C engine with unsupported options (raise) with tm.assertRaisesRegexp(ValueError, msg): @@ -61,7 +61,7 @@ def test_c_engine(self): with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), engine='c', sep='\s') with tm.assertRaisesRegexp(ValueError, msg): - read_table(StringIO(data), engine='c', skip_footer=1) + read_table(StringIO(data), engine='c', skipfooter=1) # specify C-unsupported options without python-unsupported options with tm.assert_produces_warning(parsers.ParserWarning): @@ -69,7 +69,7 @@ def test_c_engine(self): with tm.assert_produces_warning(parsers.ParserWarning): read_table(StringIO(data), sep='\s') with tm.assert_produces_warning(parsers.ParserWarning): - read_table(StringIO(data), skip_footer=1) + read_table(StringIO(data), skipfooter=1) text = """ A B C D E one two three four @@ -127,6 +127,7 @@ def test_deprecated_args(self): 'as_recarray': True, 'buffer_lines': True, 'compact_ints': True, + 'skip_footer': True, 'use_unsigned': True, } @@ -134,8 +135,12 @@ def test_deprecated_args(self): for engine in engines: for arg, non_default_val in deprecated.items(): + if engine == 'c' and arg == 'skip_footer': + # unsupported --> exception is raised + continue + if engine == 'python' and arg == 'buffer_lines': - # unsupported --> exception is raised first + # unsupported --> exception is raised continue with tm.assert_produces_warning( diff --git a/pandas/parser.pyx b/pandas/parser.pyx index b5d1c8b7acf2c..e72e2f90a5213 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -165,7 +165,7 @@ cdef extern from "parser/tokenizer.h": void *skipset int64_t skip_first_N_rows - int skip_footer + int skipfooter double (*converter)(const char *, char **, char, char, char, int) nogil # error handling @@ -270,7 +270,7 @@ cdef class TextReader: kh_str_t *true_set cdef public: - int leading_cols, table_width, skip_footer, buffer_lines + int leading_cols, table_width, skipfooter, buffer_lines object allow_leading_cols object delimiter, converters, delim_whitespace object na_values @@ -338,7 +338,7 @@ cdef class TextReader: low_memory=False, buffer_lines=None, skiprows=None, - skip_footer=0, + skipfooter=0, verbose=False, mangle_dupe_cols=True, tupleize_cols=False, @@ -418,7 +418,7 @@ cdef class TextReader: if skiprows is not None: self._make_skiprow_set() - self.skip_footer = skip_footer + self.skipfooter = skipfooter # suboptimal if usecols is not None: @@ -426,7 +426,7 @@ cdef class TextReader: self.usecols = set(usecols) # XXX - if skip_footer > 0: + if skipfooter > 0: self.parser.error_bad_lines = 0 self.parser.warn_bad_lines = 0 @@ -912,8 +912,8 @@ cdef class TextReader: if buffered_lines < irows: self._tokenize_rows(irows - buffered_lines) - if self.skip_footer > 0: - raise ValueError('skip_footer can only be used to read ' + if self.skipfooter > 0: + raise ValueError('skipfooter can only be used to read ' 'the whole file') else: with nogil: @@ -926,7 +926,7 @@ cdef class TextReader: if status < 0: raise_parser_error('Error tokenizing data', self.parser) - footer = self.skip_footer + footer = self.skipfooter if self.parser_start == self.parser.lines: raise StopIteration