diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 45385b70d8e1b9..cc8dc7b8a43498 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1065,38 +1065,6 @@ def _evaluate_usecols(usecols, names): return usecols -def _validate_usecols(usecols, names): - """ - Validates that all usecols are present in a given - list of names. If not, raise a ValueError that - shows what usecols are missing. - - Parameters - ---------- - usecols : iterable of usecols - The columns to validate are present in names. - names : iterable of names - The column names to check against. - - Returns - ------- - usecols : iterable of usecols - The `usecols` parameter if the validation succeeds. - - Raises - ------ - ValueError : Columns were missing. Error message will list them. - """ - missing = [c for c in usecols if c not in names] - if len(missing) > 0: - raise ValueError( - "Usecols do not match columns, " - "columns expected but not found: {missing}".format(missing=missing) - ) - - return usecols - - def _validate_skipfooter_arg(skipfooter): """ Validate the 'skipfooter' parameter. @@ -1128,7 +1096,7 @@ def _validate_skipfooter_arg(skipfooter): return skipfooter -def _validate_usecols_arg(usecols): +def _validate_usecols_arg(usecols, names=None): """ Validate the 'usecols' parameter. @@ -1136,16 +1104,23 @@ def _validate_usecols_arg(usecols): (column selection by index), strings (column by name) or is a callable. Raises a ValueError if that is not the case. + If 'names' is passed, validates that all usecols are present + in a given list of names. If not, raise a ValueError that + shows what usecols are missing. + Parameters ---------- usecols : array-like, callable, or None List of columns to use when parsing or a callable that can be used to filter a list of table columns. + names: iterable, default None + Iterable of names to check usecols against. + Returns ------- usecols_tuple : tuple - A tuple of (verified_usecols, usecols_dtype). + If names is not None, a tuple of (verified_usecols, usecols_dtype). 'verified_usecols' is either a set if an array-like is passed in or 'usecols' if a callable or None is passed in. @@ -1156,16 +1131,24 @@ def _validate_usecols_arg(usecols): msg = ("'usecols' must either be all strings, all unicode, " "all integers or a callable") - if usecols is not None: - if callable(usecols): - return usecols, None - usecols_dtype = lib.infer_dtype(usecols) - if usecols_dtype not in ('empty', 'integer', - 'string', 'unicode'): - raise ValueError(msg) + if names is None: + if usecols is not None: + if callable(usecols): + return usecols, None + usecols_dtype = lib.infer_dtype(usecols) + if usecols_dtype not in ('empty', 'integer', + 'string', 'unicode'): + raise ValueError(msg) - return set(usecols), usecols_dtype - return usecols, None + return set(usecols), usecols_dtype + return usecols, None + else: + missing = [c for c in usecols if c not in names] + if len(missing) > 0: + raise ValueError( + "Usecols do not match columns, columns expected " + "but not found: {missing}".format(missing=missing) + ) def _validate_parse_dates_arg(parse_dates): @@ -1694,14 +1677,14 @@ def __init__(self, src, **kwds): # GH 14671 if (self.usecols_dtype == 'string' and not set(usecols).issubset(self.orig_names)): - _validate_usecols(usecols, self.orig_names) + _validate_usecols_arg(usecols, self.orig_names) if len(self.names) > len(usecols): self.names = [n for i, n in enumerate(self.names) if (i in usecols or n in usecols)] if len(self.names) < len(usecols): - _validate_usecols(usecols, self.names) + _validate_usecols_arg(usecols, self.names) self._set_noconvert_columns() @@ -2480,7 +2463,7 @@ def _handle_usecols(self, columns, usecols_key): try: col_indices.append(usecols_key.index(col)) except ValueError: - _validate_usecols(self.usecols, usecols_key) + _validate_usecols_arg(self.usecols, usecols_key) else: col_indices.append(col) else: