Skip to content

Commit

Permalink
TST: Make encoded sep check more locale sensitive
Browse files Browse the repository at this point in the history
Closes gh-14140.
  • Loading branch information
gfyoung committed Sep 8, 2016
1 parent 02df7b6 commit 4f3d833
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
21 changes: 13 additions & 8 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,17 +800,22 @@ def _clean_options(self, options, engine):
" different from '\s+' are"\
" interpreted as regex)"
engine = 'python'

elif len(sep.encode(encoding)) > 1:
if engine not in ('python', 'python-fwf'):
fallback_reason = "the separator encoded in {encoding}"\
" is > 1 char long, and the 'c' engine"\
" does not support such separators".format(
encoding=encoding)
engine = 'python'
elif delim_whitespace:
if 'python' in engine:
result['delimiter'] = '\s+'
elif sep is not None:
encodeable = True
try:
if len(sep.encode(encoding)) > 1:
encodeable = False
except UnicodeDecodeError:
encodeable = False
if not encodeable and engine not in ('python', 'python-fwf'):
fallback_reason = "the separator encoded in {encoding}" \
" is > 1 char long, and the 'c' engine" \
" does not support such separators".format(
encoding=encoding)
engine = 'python'

if fallback_reason and engine_specified:
raise ValueError(fallback_reason)
Expand Down
4 changes: 0 additions & 4 deletions pandas/io/tests/parser/test_unsupported.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ def test_c_engine(self):
sep=None, delim_whitespace=False)
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), engine='c', sep='\s')

# GH 14120, skipping as failing when locale is set
# with tm.assertRaisesRegexp(ValueError, msg):
# read_table(StringIO(data), engine='c', sep='§')
with tm.assertRaisesRegexp(ValueError, msg):
read_table(StringIO(data), engine='c', skipfooter=1)

Expand Down

0 comments on commit 4f3d833

Please sign in to comment.