Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Deprecate skip_footer in read_csv #13386

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ skiprows : list-like or integer, default ``None``
of the file.
skipfooter : int, default ``0``
Number of lines at bottom of file to skip (unsupported with engine='c').
skip_footer : int, default ``0``
DEPRECATED: use the ``skipfooter`` parameter instead, as they are identical
nrows : int, default ``None``
Number of rows of file to read. Useful for reading pieces of large files.
low_memory : boolean, default ``True``
Expand Down Expand Up @@ -1411,7 +1413,7 @@ back to python if C-unsupported options are specified. Currently, C-unsupported
options include:

- ``sep`` other than a single character (e.g. regex separators)
- ``skip_footer``
- ``skipfooter``
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did the doc-string get changed (to add DEPRECATED)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm...not sure where those changes went. Added them back.

- ``sep=None`` with ``delim_whitespace=False``

Specifying any of the above options will produce a ``ParserWarning`` unless the
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,7 @@ Deprecations
- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`)
- ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`)
- ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`)
- ``skip_footer`` has been deprecated in ``pd.read_csv()`` in favor of ``skipfooter`` and will be removed in a future version (:issue:`13349`)
- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`)
- ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`)
- ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here <cookbook.pivot>` (:issue:`736`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def _parse_cell(cell_contents, cell_typ):
parse_dates=parse_dates,
date_parser=date_parser,
skiprows=skiprows,
skip_footer=skip_footer,
skipfooter=skip_footer,
squeeze=squeeze,
**kwds)

Expand Down
43 changes: 24 additions & 19 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@
at the start of the file
skipfooter : int, default 0
Number of lines at bottom of file to skip (Unsupported with engine='c')
skip_footer : int, default 0
DEPRECATED: use the `skipfooter` parameter instead, as they are identical
nrows : int, default None
Number of rows of file to read. Useful for reading pieces of large files
na_values : str or list-like or dict, default None
Expand Down Expand Up @@ -341,9 +343,6 @@ def _validate_nrows(nrows):
def _read(filepath_or_buffer, kwds):
"Generic reader of line files."
encoding = kwds.get('encoding', None)
skipfooter = kwds.pop('skipfooter', None)
if skipfooter is not None:
kwds['skip_footer'] = skipfooter

# If the input could be a filename, check for a recognizable compression
# extension. If we're reading from a URL, the `get_filepath_or_buffer`
Expand Down Expand Up @@ -411,8 +410,8 @@ def _read(filepath_or_buffer, kwds):
'na_values': None,
'true_values': None,
'false_values': None,
'skip_footer': 0,
'converters': None,
'skipfooter': 0,

'keep_default_na': True,
'thousands': None,
Expand Down Expand Up @@ -461,7 +460,7 @@ def _read(filepath_or_buffer, kwds):
'widths': None,
}

_c_unsupported = set(['skip_footer'])
_c_unsupported = set(['skipfooter'])
_python_unsupported = set([
'low_memory',
'buffer_lines',
Expand Down Expand Up @@ -503,7 +502,6 @@ def parser_f(filepath_or_buffer,
false_values=None,
skipinitialspace=False,
skiprows=None,
skipfooter=None,
nrows=None,

# NA and Missing Data Handling
Expand Down Expand Up @@ -541,8 +539,8 @@ def parser_f(filepath_or_buffer,
error_bad_lines=True,
warn_bad_lines=True,

# Deprecated
skip_footer=0,
skipfooter=0,
skip_footer=0, # deprecated

# Internal
doublequote=True,
Expand Down Expand Up @@ -570,6 +568,13 @@ def parser_f(filepath_or_buffer,
engine = 'c'
engine_specified = False

if skip_footer != 0:
warnings.warn("The 'skip_footer' argument has "
"been deprecated and will be removed "
"in a future version. Please use the "
"'skipfooter' argument instead.",
FutureWarning, stacklevel=2)

kwds = dict(delimiter=delimiter,
engine=engine,
dialect=dialect,
Expand Down Expand Up @@ -767,9 +772,9 @@ def _clean_options(self, options, engine):

# C engine not supported yet
if engine == 'c':
if options['skip_footer'] > 0:
if options['skipfooter'] > 0:
fallback_reason = "the 'c' engine does not support"\
" skip_footer"
" skipfooter"
engine = 'python'

if sep is None and not delim_whitespace:
Expand Down Expand Up @@ -902,8 +907,8 @@ def _failover_to_python(self):

def read(self, nrows=None):
if nrows is not None:
if self.options.get('skip_footer'):
raise ValueError('skip_footer not supported for iteration')
if self.options.get('skipfooter'):
raise ValueError('skipfooter not supported for iteration')

ret = self._engine.read(nrows)

Expand Down Expand Up @@ -1578,7 +1583,7 @@ def TextParser(*args, **kwds):
date_parser : function, default None
skiprows : list of integers
Row numbers to skip
skip_footer : int
skipfooter : int
Number of line at bottom of file to skip
converters : dict, default None
Dict of functions for converting values in certain columns. Keys can
Expand Down Expand Up @@ -1691,7 +1696,7 @@ def __init__(self, f, **kwds):
self.memory_map = kwds['memory_map']
self.skiprows = kwds['skiprows']

self.skip_footer = kwds['skip_footer']
self.skipfooter = kwds['skipfooter']
self.delimiter = kwds['delimiter']

self.quotechar = kwds['quotechar']
Expand Down Expand Up @@ -2323,7 +2328,7 @@ def _rows_to_cols(self, content):
content, min_width=col_len).T)
zip_len = len(zipped_content)

if self.skip_footer < 0:
if self.skipfooter < 0:
raise ValueError('skip footer cannot be negative')

# Loop through rows to verify lengths are correct.
Expand All @@ -2336,8 +2341,8 @@ def _rows_to_cols(self, content):
break

footers = 0
if self.skip_footer:
footers = self.skip_footer
if self.skipfooter:
footers = self.skipfooter

row_num = self.pos - (len(content) - i + footers)

Expand Down Expand Up @@ -2423,8 +2428,8 @@ def _get_lines(self, rows=None):
else:
lines = new_rows

if self.skip_footer:
lines = lines[:-self.skip_footer]
if self.skipfooter:
lines = lines[:-self.skipfooter]

lines = self._check_comments(lines)
if self.skip_blank_lines:
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/tests/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,9 @@ def test_malformed(self):
skiprows=[2])
it.read()

# skip_footer is not supported with the C parser yet
# skipfooter is not supported with the C parser yet
if self.engine == 'python':
# skip_footer
# skipfooter
data = """ignore
A,B,C
1,2,3 # comment
Expand All @@ -232,7 +232,7 @@ def test_malformed(self):
with tm.assertRaisesRegexp(Exception, msg):
self.read_table(StringIO(data), sep=',',
header=1, comment='#',
skip_footer=1)
skipfooter=1)

def test_quoting(self):
bad_line_small = """printer\tresult\tvariant_name
Expand Down Expand Up @@ -524,11 +524,11 @@ def test_iterator(self):
self.assertEqual(len(result), 3)
tm.assert_frame_equal(pd.concat(result), expected)

# skip_footer is not supported with the C parser yet
# skipfooter is not supported with the C parser yet
if self.engine == 'python':
# test bad parameter (skip_footer)
# test bad parameter (skipfooter)
reader = self.read_csv(StringIO(self.data1), index_col=0,
iterator=True, skip_footer=True)
iterator=True, skipfooter=True)
self.assertRaises(ValueError, reader.read, 3)

def test_pass_names_with_index(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/tests/parser/python_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_single_line(self):
finally:
sys.stdout = sys.__stdout__

def test_skip_footer(self):
def test_skipfooter(self):
# see gh-6607
data = """A,B,C
1,2,3
Expand All @@ -107,7 +107,7 @@ def test_skip_footer(self):
want to skip this
also also skip this
"""
result = self.read_csv(StringIO(data), skip_footer=2)
result = self.read_csv(StringIO(data), skipfooter=2)
no_footer = '\n'.join(data.split('\n')[:-3])
expected = self.read_csv(StringIO(no_footer))
tm.assert_frame_equal(result, expected)
Expand Down
13 changes: 9 additions & 4 deletions pandas/io/tests/parser/test_unsupported.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_c_engine(self):
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), sep='\s', dtype={'a': float})
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), skip_footer=1, dtype={'a': float})
read_table(StringIO(data), skipfooter=1, dtype={'a': float})

# specify C engine with unsupported options (raise)
with tm.assertRaisesRegexp(ValueError, msg):
Expand All @@ -61,15 +61,15 @@ def test_c_engine(self):
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), engine='c', sep='\s')
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), engine='c', skip_footer=1)
read_table(StringIO(data), engine='c', skipfooter=1)

# specify C-unsupported options without python-unsupported options
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), sep=None, delim_whitespace=False)
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), sep='\s')
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), skip_footer=1)
read_table(StringIO(data), skipfooter=1)

text = """ A B C D E
one two three four
Expand Down Expand Up @@ -127,15 +127,20 @@ def test_deprecated_args(self):
'as_recarray': True,
'buffer_lines': True,
'compact_ints': True,
'skip_footer': True,
'use_unsigned': True,
}

engines = 'c', 'python'

for engine in engines:
for arg, non_default_val in deprecated.items():
if engine == 'c' and arg == 'skip_footer':
# unsupported --> exception is raised
continue

if engine == 'python' and arg == 'buffer_lines':
# unsupported --> exception is raised first
# unsupported --> exception is raised
continue

with tm.assert_produces_warning(
Expand Down
16 changes: 8 additions & 8 deletions pandas/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ cdef extern from "parser/tokenizer.h":

void *skipset
int64_t skip_first_N_rows
int skip_footer
int skipfooter
double (*converter)(const char *, char **, char, char, char, int) nogil

# error handling
Expand Down Expand Up @@ -270,7 +270,7 @@ cdef class TextReader:
kh_str_t *true_set

cdef public:
int leading_cols, table_width, skip_footer, buffer_lines
int leading_cols, table_width, skipfooter, buffer_lines
object allow_leading_cols
object delimiter, converters, delim_whitespace
object na_values
Expand Down Expand Up @@ -338,7 +338,7 @@ cdef class TextReader:
low_memory=False,
buffer_lines=None,
skiprows=None,
skip_footer=0,
skipfooter=0,
verbose=False,
mangle_dupe_cols=True,
tupleize_cols=False,
Expand Down Expand Up @@ -418,15 +418,15 @@ cdef class TextReader:
if skiprows is not None:
self._make_skiprow_set()

self.skip_footer = skip_footer
self.skipfooter = skipfooter

# suboptimal
if usecols is not None:
self.has_usecols = 1
self.usecols = set(usecols)

# XXX
if skip_footer > 0:
if skipfooter > 0:
self.parser.error_bad_lines = 0
self.parser.warn_bad_lines = 0

Expand Down Expand Up @@ -912,8 +912,8 @@ cdef class TextReader:
if buffered_lines < irows:
self._tokenize_rows(irows - buffered_lines)

if self.skip_footer > 0:
raise ValueError('skip_footer can only be used to read '
if self.skipfooter > 0:
raise ValueError('skipfooter can only be used to read '
'the whole file')
else:
with nogil:
Expand All @@ -926,7 +926,7 @@ cdef class TextReader:

if status < 0:
raise_parser_error('Error tokenizing data', self.parser)
footer = self.skip_footer
footer = self.skipfooter

if self.parser_start == self.parser.lines:
raise StopIteration
Expand Down