pandas-dev · jreback · Dec 3, 2017 · Aug 22, 2017 · Oct 4, 2017 · Oct 4, 2017
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1065,6 +1065,38 @@ def _evaluate_usecols(usecols, names):
     return usecols
 
 
+def _validate_usecols(usecols, names):
+    """
+    Validates that all usecols are present in a given
+    list of names. If not, raise a ValueError that
+    shows what usecols are missing.
+
+    Parameters
+    ----------
+    usecols : iterable of usecols
+        The columns to validate are present in names.
+    names : iterable of names
+        The column names to check against.
+
+    Returns
+    -------
+    usecols : iterable of usecols
+        The `usecols` parameter if the validation succeeds.
+
+    Raises
+    ------
+    ValueError : Columns were missing. Error message will list them.
+    """
+    missing = [c for c in usecols if c not in names]
+    if len(missing) > 0:
+        raise ValueError(
+            "Usecols do not match columns, "
+            "columns expected but not found: {missing}".format(missing=missing)
+        )
+
+    return usecols
+
+
 def _validate_skipfooter_arg(skipfooter):
     """
     Validate the 'skipfooter' parameter.
@@ -1662,14 +1694,14 @@ def __init__(self, src, **kwds):
             # GH 14671
             if (self.usecols_dtype == 'string' and
                     not set(usecols).issubset(self.orig_names)):
-                raise ValueError("Usecols do not match names.")
+                _validate_usecols(usecols, self.orig_names)
 
             if len(self.names) > len(usecols):
                 self.names = [n for i, n in enumerate(self.names)
                               if (i in usecols or n in usecols)]
 
             if len(self.names) < len(usecols):
-                raise ValueError("Usecols do not match names.")
+                _validate_usecols(usecols, self.names)
 
         self._set_noconvert_columns()
 
@@ -2442,9 +2474,13 @@ def _handle_usecols(self, columns, usecols_key):
                     raise ValueError("If using multiple headers, usecols must "
                                      "be integers.")
                 col_indices = []
+
                 for col in self.usecols:
                     if isinstance(col, string_types):
-                        col_indices.append(usecols_key.index(col))
+                        try:
+                            col_indices.append(usecols_key.index(col))
+                        except ValueError:
+                            _validate_usecols(self.usecols, usecols_key)
                     else:
                         col_indices.append(col)
             else:

diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py
@@ -480,10 +480,10 @@ def test_raise_on_usecols_names_mismatch(self):
         # GH 14671
         data = 'a,b,c,d\n1,2,3,4\n5,6,7,8'
 
-        if self.engine == 'c':
-            msg = 'Usecols do not match names'
-        else:
-            msg = 'is not in list'
+        msg = (
+            "Usecols do not match columns, "
+            "columns expected but not found: {missing}"
+        )
 
         usecols = ['a', 'b', 'c', 'd']
         df = self.read_csv(StringIO(data), usecols=usecols)
@@ -492,11 +492,11 @@ def test_raise_on_usecols_names_mismatch(self):
         tm.assert_frame_equal(df, expected)
 
         usecols = ['a', 'b', 'c', 'f']
-        with tm.assert_raises_regex(ValueError, msg):
+        with tm.assert_raises_regex(ValueError, msg.format(missing="\['f'\]")):
             self.read_csv(StringIO(data), usecols=usecols)
 
         usecols = ['a', 'b', 'f']
-        with tm.assert_raises_regex(ValueError, msg):
+        with tm.assert_raises_regex(ValueError, msg.format(missing="\['f'\]")):
             self.read_csv(StringIO(data), usecols=usecols)
 
         names = ['A', 'B', 'C', 'D']
@@ -520,9 +520,9 @@ def test_raise_on_usecols_names_mismatch(self):
         # tm.assert_frame_equal(df, expected)
 
         usecols = ['A', 'B', 'C', 'f']
-        with tm.assert_raises_regex(ValueError, msg):
+        with tm.assert_raises_regex(ValueError, msg.format(missing="\['f'\]")):
             self.read_csv(StringIO(data), header=0, names=names,
                           usecols=usecols)
         usecols = ['A', 'B', 'f']
-        with tm.assert_raises_regex(ValueError, msg):
+        with tm.assert_raises_regex(ValueError, msg.format(missing="\['f'\]")):
             self.read_csv(StringIO(data), names=names, usecols=usecols)