From 4f3d833d7d4a618e9714ac4efe03ab4f2fdce912 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 5 Sep 2016 22:06:44 -0400 Subject: [PATCH] TST: Make encoded sep check more locale sensitive Closes gh-14140. --- pandas/io/parsers.py | 21 +++++++++++++-------- pandas/io/tests/parser/test_unsupported.py | 4 ---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3bd8579d456d3..93c431531355a 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -800,17 +800,22 @@ def _clean_options(self, options, engine): " different from '\s+' are"\ " interpreted as regex)" engine = 'python' - - elif len(sep.encode(encoding)) > 1: - if engine not in ('python', 'python-fwf'): - fallback_reason = "the separator encoded in {encoding}"\ - " is > 1 char long, and the 'c' engine"\ - " does not support such separators".format( - encoding=encoding) - engine = 'python' elif delim_whitespace: if 'python' in engine: result['delimiter'] = '\s+' + elif sep is not None: + encodeable = True + try: + if len(sep.encode(encoding)) > 1: + encodeable = False + except UnicodeDecodeError: + encodeable = False + if not encodeable and engine not in ('python', 'python-fwf'): + fallback_reason = "the separator encoded in {encoding}" \ + " is > 1 char long, and the 'c' engine" \ + " does not support such separators".format( + encoding=encoding) + engine = 'python' if fallback_reason and engine_specified: raise ValueError(fallback_reason) diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py index 0bfb8b17349cf..ef8f7967193ff 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/io/tests/parser/test_unsupported.py @@ -60,10 +60,6 @@ def test_c_engine(self): sep=None, delim_whitespace=False) with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), engine='c', sep='\s') - - # GH 14120, skipping as failing when locale is set - # with tm.assertRaisesRegexp(ValueError, msg): - # read_table(StringIO(data), engine='c', sep='ยง') with tm.assertRaisesRegexp(ValueError, msg): read_table(StringIO(data), engine='c', skipfooter=1)