Skip to content

Commit

Permalink
Check for usecols mismatch with names
Browse files Browse the repository at this point in the history
  • Loading branch information
brendapraggastis authored and gfyoung committed May 24, 2017
1 parent 7271f50 commit 4af9e45
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 0 deletions.
6 changes: 6 additions & 0 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1626,6 +1626,12 @@ def __init__(self, src, **kwds):

if self.usecols:
usecols = _evaluate_usecols(self.usecols, self.orig_names)

# see gh-14671
if (self.usecols_dtype == 'string' and
not set(usecols).issubset(self.orig_names)):
raise ValueError("Usecols do not match names.")

if len(self.names) > len(usecols):
self.names = [n for i, n in enumerate(self.names)
if (i in usecols or n in usecols)]
Expand Down
49 changes: 49 additions & 0 deletions pandas/tests/io/parser/usecols.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,3 +475,52 @@ def test_uneven_length_cols(self):
'C': [3, 5, 4, 3, 3, 7]})
df = self.read_csv(StringIO(data), usecols=usecols)
tm.assert_frame_equal(df, expected)

def test_raise_on_usecols_names_mismatch(self):
# see gh-14671
msg = ('Usecols do not match names' if self.engine == 'c'
else 'is not in list')
data = 'a,b,c,d\n1,2,3,4\n5,6,7,8'

usecols = ['a', 'b', 'c', 'd']
df = self.read_csv(StringIO(data), usecols=usecols)
expected = DataFrame({'a': [1, 5], 'b': [2, 6],
'c': [3, 7], 'd': [4, 8]})
tm.assert_frame_equal(df, expected)

usecols = ['a', 'b', 'c', 'f']
with tm.assert_raises_regex(ValueError, msg):
self.read_csv(StringIO(data), usecols=usecols)

usecols = ['a', 'b', 'f']
with tm.assert_raises_regex(ValueError, msg):
self.read_csv(StringIO(data), usecols=usecols)

names = ['A', 'B', 'C', 'D']

df = self.read_csv(StringIO(data), header=0, names=names)
expected = DataFrame({'A': [1, 5], 'B': [2, 6],
'C': [3, 7], 'D': [4, 8]})
tm.assert_frame_equal(df, expected)

# usecols = ['A', 'C']
# df = self.read_csv(StringIO(data), header=0,
# names=names, usecols=usecols)
# expected = DataFrame({'A': [1, 5], 'C': [3, 7]})
# tm.assert_frame_equal(df, expected)
#
# usecols = [0, 2]
# df = self.read_csv(StringIO(data), header=0,
# names=names, usecols=usecols)
# expected = DataFrame({'A': [1, 5], 'C': [3, 7]})
# tm.assert_frame_equal(df, expected)

usecols = ['A', 'B', 'C', 'f']
with tm.assert_raises_regex(ValueError, msg):
self.read_csv(StringIO(data), header=0,
names=names, usecols=usecols)

usecols = ['A', 'B', 'f']
with tm.assert_raises_regex(ValueError, msg):
self.read_csv(StringIO(data),
names=names, usecols=usecols)

0 comments on commit 4af9e45

Please sign in to comment.