Skip to content

Commit

Permalink
Refactoring into _validate_usecols_arg as suggested
Browse files Browse the repository at this point in the history
  • Loading branch information
AaronCritchley committed Nov 8, 2017
1 parent ba93833 commit cce97ad
Showing 1 changed file with 29 additions and 46 deletions.
75 changes: 29 additions & 46 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,38 +1065,6 @@ def _evaluate_usecols(usecols, names):
return usecols


def _validate_usecols(usecols, names):
"""
Validates that all usecols are present in a given
list of names. If not, raise a ValueError that
shows what usecols are missing.
Parameters
----------
usecols : iterable of usecols
The columns to validate are present in names.
names : iterable of names
The column names to check against.
Returns
-------
usecols : iterable of usecols
The `usecols` parameter if the validation succeeds.
Raises
------
ValueError : Columns were missing. Error message will list them.
"""
missing = [c for c in usecols if c not in names]
if len(missing) > 0:
raise ValueError(
"Usecols do not match columns, "
"columns expected but not found: {missing}".format(missing=missing)
)

return usecols


def _validate_skipfooter_arg(skipfooter):
"""
Validate the 'skipfooter' parameter.
Expand Down Expand Up @@ -1128,24 +1096,31 @@ def _validate_skipfooter_arg(skipfooter):
return skipfooter


def _validate_usecols_arg(usecols):
def _validate_usecols_arg(usecols, names=None):
"""
Validate the 'usecols' parameter.
Checks whether or not the 'usecols' parameter contains all integers
(column selection by index), strings (column by name) or is a callable.
Raises a ValueError if that is not the case.
If 'names' is passed, validates that all usecols are present
in a given list of names. If not, raise a ValueError that
shows what usecols are missing.
Parameters
----------
usecols : array-like, callable, or None
List of columns to use when parsing or a callable that can be used
to filter a list of table columns.
names: iterable, default None
Iterable of names to check usecols against.
Returns
-------
usecols_tuple : tuple
A tuple of (verified_usecols, usecols_dtype).
If names is not None, a tuple of (verified_usecols, usecols_dtype).
'verified_usecols' is either a set if an array-like is passed in or
'usecols' if a callable or None is passed in.
Expand All @@ -1156,16 +1131,24 @@ def _validate_usecols_arg(usecols):
msg = ("'usecols' must either be all strings, all unicode, "
"all integers or a callable")

if usecols is not None:
if callable(usecols):
return usecols, None
usecols_dtype = lib.infer_dtype(usecols)
if usecols_dtype not in ('empty', 'integer',
'string', 'unicode'):
raise ValueError(msg)
if names is None:
if usecols is not None:
if callable(usecols):
return usecols, None
usecols_dtype = lib.infer_dtype(usecols)
if usecols_dtype not in ('empty', 'integer',
'string', 'unicode'):
raise ValueError(msg)

return set(usecols), usecols_dtype
return usecols, None
return set(usecols), usecols_dtype
return usecols, None
else:
missing = [c for c in usecols if c not in names]
if len(missing) > 0:
raise ValueError(
"Usecols do not match columns, columns expected "
"but not found: {missing}".format(missing=missing)
)


def _validate_parse_dates_arg(parse_dates):
Expand Down Expand Up @@ -1694,14 +1677,14 @@ def __init__(self, src, **kwds):
# GH 14671
if (self.usecols_dtype == 'string' and
not set(usecols).issubset(self.orig_names)):
_validate_usecols(usecols, self.orig_names)
_validate_usecols_arg(usecols, self.orig_names)

if len(self.names) > len(usecols):
self.names = [n for i, n in enumerate(self.names)
if (i in usecols or n in usecols)]

if len(self.names) < len(usecols):
_validate_usecols(usecols, self.names)
_validate_usecols_arg(usecols, self.names)

self._set_noconvert_columns()

Expand Down Expand Up @@ -2480,7 +2463,7 @@ def _handle_usecols(self, columns, usecols_key):
try:
col_indices.append(usecols_key.index(col))
except ValueError:
_validate_usecols(self.usecols, usecols_key)
_validate_usecols_arg(self.usecols, usecols_key)
else:
col_indices.append(col)
else:
Expand Down

0 comments on commit cce97ad

Please sign in to comment.