Skip to content

Commit

Permalink
Add skip_footer to ExcelFile.parse and alias skipfooter/skip_footer
Browse files Browse the repository at this point in the history
  • Loading branch information
Chang She committed Sep 25, 2012
1 parent e03bfcd commit 9ceea2f
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 53 deletions.
130 changes: 77 additions & 53 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ def _is_url(url):
def _read(cls, filepath_or_buffer, kwds):
"Generic reader of line files."
encoding = kwds.get('encoding', None)
skipfooter = kwds.pop('skipfooter', None)
if skipfooter is not None:
kwds['skip_footer'] = skipfooter

if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
from urllib2 import urlopen
Expand Down Expand Up @@ -218,28 +221,31 @@ def read_csv(filepath_or_buffer,
verbose=False,
delimiter=None,
encoding=None,
squeeze=False):
kwds = dict(filepath_or_buffer=filepath_or_buffer,
sep=sep, dialect=dialect,
header=header, index_col=index_col,
names=names, skiprows=skiprows,
na_values=na_values, keep_default_na=keep_default_na,
thousands=thousands,
comment=comment, parse_dates=parse_dates,
keep_date_col=keep_date_col,
dayfirst=dayfirst, date_parser=date_parser,
nrows=nrows, iterator=iterator,
chunksize=chunksize, skip_footer=skip_footer,
converters=converters, verbose=verbose,
delimiter=delimiter, encoding=encoding,
squeeze=squeeze)
squeeze=False,
**kwds):
kdict = dict(filepath_or_buffer=filepath_or_buffer,
sep=sep, dialect=dialect,
header=header, index_col=index_col,
names=names, skiprows=skiprows,
na_values=na_values, keep_default_na=keep_default_na,
thousands=thousands,
comment=comment, parse_dates=parse_dates,
keep_date_col=keep_date_col,
dayfirst=dayfirst, date_parser=date_parser,
nrows=nrows, iterator=iterator,
chunksize=chunksize, skip_footer=skip_footer,
converters=converters, verbose=verbose,
delimiter=delimiter, encoding=encoding,
squeeze=squeeze)

kdict.update(kwds)

# Alias sep -> delimiter.
sep = kwds.pop('sep')
if kwds.get('delimiter', None) is None:
kwds['delimiter'] = sep
sep = kdict.pop('sep')
if kdict.get('delimiter', None) is None:
kdict['delimiter'] = sep

return _read(TextParser, filepath_or_buffer, kwds)
return _read(TextParser, filepath_or_buffer, kdict)

@Appender(_read_table_doc)
def read_table(filepath_or_buffer,
Expand All @@ -265,31 +271,34 @@ def read_table(filepath_or_buffer,
verbose=False,
delimiter=None,
encoding=None,
squeeze=False):
kwds = dict(filepath_or_buffer=filepath_or_buffer,
sep=sep, dialect=dialect,
header=header, index_col=index_col,
names=names, skiprows=skiprows,
na_values=na_values, keep_default_na=keep_default_na,
thousands=thousands,
comment=comment, parse_dates=parse_dates,
keep_date_col=keep_date_col,
dayfirst=dayfirst, date_parser=date_parser,
nrows=nrows, iterator=iterator,
chunksize=chunksize, skip_footer=skip_footer,
converters=converters, verbose=verbose,
delimiter=delimiter, encoding=encoding,
squeeze=squeeze)
squeeze=False,
**kwds):
kdict = dict(filepath_or_buffer=filepath_or_buffer,
sep=sep, dialect=dialect,
header=header, index_col=index_col,
names=names, skiprows=skiprows,
na_values=na_values, keep_default_na=keep_default_na,
thousands=thousands,
comment=comment, parse_dates=parse_dates,
keep_date_col=keep_date_col,
dayfirst=dayfirst, date_parser=date_parser,
nrows=nrows, iterator=iterator,
chunksize=chunksize, skip_footer=skip_footer,
converters=converters, verbose=verbose,
delimiter=delimiter, encoding=encoding,
squeeze=squeeze)

kdict.update(kwds)

# Alias sep -> delimiter.
sep = kwds.pop('sep')
if kwds.get('delimiter', None) is None:
kwds['delimiter'] = sep
sep = kdict.pop('sep')
if kdict.get('delimiter', None) is None:
kdict['delimiter'] = sep

# Override as default encoding.
kwds['encoding'] = None
kdict['encoding'] = None

return _read(TextParser, filepath_or_buffer, kwds)
return _read(TextParser, filepath_or_buffer, kdict)

@Appender(_read_fwf_doc)
def read_fwf(filepath_or_buffer,
Expand All @@ -315,8 +324,9 @@ def read_fwf(filepath_or_buffer,
delimiter=None,
verbose=False,
encoding=None,
squeeze=False):
kwds = dict(filepath_or_buffer=filepath_or_buffer,
squeeze=False,
**kwds):
kdict = dict(filepath_or_buffer=filepath_or_buffer,
colspecs=colspecs, widths=widths,
header=header, index_col=index_col,
names=names, skiprows=skiprows,
Expand All @@ -331,9 +341,11 @@ def read_fwf(filepath_or_buffer,
delimiter=delimiter, encoding=encoding,
squeeze=squeeze)

kdict.update(kwds)

# Check input arguments.
colspecs = kwds.get('colspecs', None)
widths = kwds.pop('widths', None)
colspecs = kdict.get('colspecs', None)
widths = kdict.pop('widths', None)
if bool(colspecs is None) == bool(widths is None):
raise ValueError("You must specify only one of 'widths' and "
"'colspecs'")
Expand All @@ -344,10 +356,10 @@ def read_fwf(filepath_or_buffer,
for w in widths:
colspecs.append( (col, col+w) )
col += w
kwds['colspecs'] = colspecs
kdict['colspecs'] = colspecs

kwds['thousands'] = thousands
return _read(FixedWidthFieldParser, filepath_or_buffer, kwds)
kdict['thousands'] = thousands
return _read(FixedWidthFieldParser, filepath_or_buffer, kdict)

def read_clipboard(**kwargs): # pragma: no cover
"""
Expand Down Expand Up @@ -1276,9 +1288,10 @@ def __init__(self, path_or_buf):
def __repr__(self):
return object.__repr__(self)

def parse(self, sheetname, header=0, skiprows=None, index_col=None,
parse_cols=None, parse_dates=False, date_parser=None,
na_values=None, thousands=None, chunksize=None):
def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
index_col=None, parse_cols=None, parse_dates=False,
date_parser=None, na_values=None, thousands=None, chunksize=None,
**kwds):
"""
Read Excel table into DataFrame
Expand All @@ -1289,7 +1302,9 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
header : int, default 0
Row to use for the column labels of the parsed DataFrame
skiprows : list-like
Row numbers to skip (0-indexed)
Rows to skip at the beginning (0-indexed)
skip_footer : int, default 0
Rows at the end to skip (0-indexed)
index_col : int, default None
Column to use as the row labels of the DataFrame. Pass None if
there is no such column
Expand All @@ -1304,6 +1319,10 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
-------
parsed : DataFrame
"""
skipfooter = kwds.pop('skipfooter', None)
if skipfooter is not None:
skip_footer = skipfooter

choose = {True:self._parse_xlsx,
False:self._parse_xls}
return choose[self.use_xlsx](sheetname, header=header,
Expand All @@ -1313,15 +1332,17 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
date_parser=date_parser,
na_values=na_values,
thousands=thousands,
chunksize=chunksize)
chunksize=chunksize,
skip_footer=skip_footer)

def _should_parse(self, i, parse_cols):
if isinstance(parse_cols, int):
return i <= parse_cols
else:
return i in parse_cols

def _parse_xlsx(self, sheetname, header=0, skiprows=None, index_col=None,
def _parse_xlsx(self, sheetname, header=0, skiprows=None,
skip_footer=0, index_col=None,
parse_cols=None, parse_dates=False, date_parser=None,
na_values=None, thousands=None, chunksize=None):
sheet = self.book.get_sheet_by_name(name=sheetname)
Expand Down Expand Up @@ -1350,11 +1371,13 @@ def _parse_xlsx(self, sheetname, header=0, skiprows=None, index_col=None,
parse_dates=parse_dates,
date_parser=date_parser,
skiprows=skiprows,
skip_footer=skip_footer,
chunksize=chunksize)

return parser.get_chunk()

def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None,
def _parse_xls(self, sheetname, header=0, skiprows=None,
skip_footer=0, index_col=None,
parse_cols=None, parse_dates=False, date_parser=None,
na_values=None, thousands=None, chunksize=None):
from datetime import MINYEAR, time, datetime
Expand Down Expand Up @@ -1394,6 +1417,7 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None,
parse_dates=parse_dates,
date_parser=date_parser,
skiprows=skiprows,
skip_footer=skip_footer,
chunksize=chunksize)

return parser.get_chunk()
Expand Down
22 changes: 22 additions & 0 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,13 @@ def test_excel_table(self):
assert_frame_equal(df, df2)
assert_frame_equal(df3, df2)

df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
skipfooter=1)
df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
skip_footer=1)
assert_frame_equal(df4, df.ix[:-1])
assert_frame_equal(df4, df5)

def test_excel_read_buffer(self):
_skip_if_no_xlrd()
_skip_if_no_openpyxl()
Expand All @@ -788,6 +795,13 @@ def test_xlsx_table(self):
assert_frame_equal(df, df2)
assert_frame_equal(df3, df2)

df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
skipfooter=1)
df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
skip_footer=1)
assert_frame_equal(df4, df.ix[:-1])
assert_frame_equal(df4, df5)

def test_parse_cols_int(self):
_skip_if_no_openpyxl()
_skip_if_no_xlrd()
Expand Down Expand Up @@ -1125,6 +1139,14 @@ def test_skip_footer(self):
result = read_csv(StringIO(data), nrows=3)
assert_frame_equal(result, expected)

# skipfooter alias
result = read_csv(StringIO(data), skipfooter=2)
no_footer = '\n'.join(data.split('\n')[:-3])
expected = read_csv(StringIO(no_footer))

assert_frame_equal(result, expected)


def test_no_unnamed_index(self):
data = """ id c0 c1 c2
0 1 0 a b
Expand Down

0 comments on commit 9ceea2f

Please sign in to comment.