From edc6e9333425b2baaf566c9407acde37ecaa52ec Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 15 Sep 2018 18:10:27 +0100 Subject: [PATCH 1/2] remove duplicate file and create filepath fixture --- pandas/tests/io/conftest.py | 6 ++++++ .../io/{formats => }/data/unicode_series.csv | 0 pandas/tests/io/formats/test_format.py | 5 ++--- pandas/tests/io/parser/common.py | 7 +++---- pandas/tests/io/parser/data/unicode_series.csv | 18 ------------------ pandas/tests/io/parser/dtypes.py | 9 +++++---- 6 files changed, 16 insertions(+), 29 deletions(-) rename pandas/tests/io/{formats => }/data/unicode_series.csv (100%) delete mode 100644 pandas/tests/io/parser/data/unicode_series.csv diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index b0cdbe2b5bedb..222dc6a49ca1d 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -14,6 +14,12 @@ def jsonl_file(datapath): return datapath('io', 'parser', 'data', 'items.jsonl') +@pytest.fixture +def unicode_series_file(datapath): + """Path to unicode_series dataset""" + return datapath('io', 'data', 'unicode_series.csv') + + @pytest.fixture def salaries_table(datapath): """DataFrame with the salaries dataset""" diff --git a/pandas/tests/io/formats/data/unicode_series.csv b/pandas/tests/io/data/unicode_series.csv similarity index 100% rename from pandas/tests/io/formats/data/unicode_series.csv rename to pandas/tests/io/data/unicode_series.csv diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c19f8e57f9ae7..344a2e937920e 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -954,9 +954,8 @@ def test_unicode_problem_decoding_as_ascii(self): dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})}) compat.text_type(dm.to_string()) - def test_string_repr_encoding(self, datapath): - filepath = datapath('io', 'formats', 'data', 'unicode_series.csv') - df = pd.read_csv(filepath, header=None, encoding='latin1') + def test_string_repr_encoding(self, unicode_series_file): + df = pd.read_csv(unicode_series_file, header=None, encoding='latin1') repr(df) repr(df[1]) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 9e871d27f0ce8..ac1c8c28e5846 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -765,10 +765,9 @@ def test_utf16_example(self, datapath): result = self.read_table(buf, encoding='utf-16') assert len(result) == 50 - def test_unicode_encoding(self, datapath): - pth = datapath('io', 'parser', 'data', 'unicode_series.csv') - - result = self.read_csv(pth, header=None, encoding='latin-1') + def test_unicode_encoding(self, unicode_series_file): + result = self.read_csv(unicode_series_file, + header=None, encoding='latin-1') result = result.set_index(0) got = result[1][1632] diff --git a/pandas/tests/io/parser/data/unicode_series.csv b/pandas/tests/io/parser/data/unicode_series.csv deleted file mode 100644 index 2485e149edb06..0000000000000 --- a/pandas/tests/io/parser/data/unicode_series.csv +++ /dev/null @@ -1,18 +0,0 @@ -1617,King of New York (1990) -1618,All Things Fair (1996) -1619,"Sixth Man, The (1997)" -1620,Butterfly Kiss (1995) -1621,"Paris, France (1993)" -1622,"Cérémonie, La (1995)" -1623,Hush (1998) -1624,Nightwatch (1997) -1625,Nobody Loves Me (Keiner liebt mich) (1994) -1626,"Wife, The (1995)" -1627,Lamerica (1994) -1628,Nico Icon (1995) -1629,"Silence of the Palace, The (Saimt el Qusur) (1994)" -1630,"Slingshot, The (1993)" -1631,Land and Freedom (Tierra y libertad) (1995) -1632,Á köldum klaka (Cold Fever) (1994) -1633,Etz Hadomim Tafus (Under the Domin Tree) (1994) -1634,Two Friends (1986) diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index 8060ebf2fbcd4..d315e92f33ccc 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -125,13 +125,14 @@ def test_categorical_dtype_high_cardinality_numeric(self): np.sort(actual.a.cat.categories), ordered=True) tm.assert_frame_equal(actual, expected) - def test_categorical_dtype_encoding(self, datapath): + def test_categorical_dtype_encoding(self, datapath, unicode_series_file): # GH 10153 - pth = datapath('io', 'parser', 'data', 'unicode_series.csv') encoding = 'latin-1' - expected = self.read_csv(pth, header=None, encoding=encoding) + expected = self.read_csv(unicode_series_file, + header=None, encoding=encoding) expected[1] = Categorical(expected[1]) - actual = self.read_csv(pth, header=None, encoding=encoding, + actual = self.read_csv(unicode_series_file, + header=None, encoding=encoding, dtype={1: 'category'}) tm.assert_frame_equal(actual, expected) From b11a55eb9697f45de3004d57b28534eb2fe9e4b6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 15 Sep 2018 21:23:09 +0100 Subject: [PATCH 2/2] change data file used by test_format.py only --- pandas/tests/io/conftest.py | 6 ------ pandas/tests/io/formats/test_format.py | 5 +++-- pandas/tests/io/parser/common.py | 7 ++++--- pandas/tests/io/{ => parser}/data/unicode_series.csv | 0 pandas/tests/io/parser/dtypes.py | 9 ++++----- 5 files changed, 11 insertions(+), 16 deletions(-) rename pandas/tests/io/{ => parser}/data/unicode_series.csv (100%) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 222dc6a49ca1d..b0cdbe2b5bedb 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -14,12 +14,6 @@ def jsonl_file(datapath): return datapath('io', 'parser', 'data', 'items.jsonl') -@pytest.fixture -def unicode_series_file(datapath): - """Path to unicode_series dataset""" - return datapath('io', 'data', 'unicode_series.csv') - - @pytest.fixture def salaries_table(datapath): """DataFrame with the salaries dataset""" diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 344a2e937920e..ffbc978b92ba5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -954,8 +954,9 @@ def test_unicode_problem_decoding_as_ascii(self): dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})}) compat.text_type(dm.to_string()) - def test_string_repr_encoding(self, unicode_series_file): - df = pd.read_csv(unicode_series_file, header=None, encoding='latin1') + def test_string_repr_encoding(self, datapath): + filepath = datapath('io', 'parser', 'data', 'unicode_series.csv') + df = pd.read_csv(filepath, header=None, encoding='latin1') repr(df) repr(df[1]) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index ac1c8c28e5846..9e871d27f0ce8 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -765,9 +765,10 @@ def test_utf16_example(self, datapath): result = self.read_table(buf, encoding='utf-16') assert len(result) == 50 - def test_unicode_encoding(self, unicode_series_file): - result = self.read_csv(unicode_series_file, - header=None, encoding='latin-1') + def test_unicode_encoding(self, datapath): + pth = datapath('io', 'parser', 'data', 'unicode_series.csv') + + result = self.read_csv(pth, header=None, encoding='latin-1') result = result.set_index(0) got = result[1][1632] diff --git a/pandas/tests/io/data/unicode_series.csv b/pandas/tests/io/parser/data/unicode_series.csv similarity index 100% rename from pandas/tests/io/data/unicode_series.csv rename to pandas/tests/io/parser/data/unicode_series.csv diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index d315e92f33ccc..8060ebf2fbcd4 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -125,14 +125,13 @@ def test_categorical_dtype_high_cardinality_numeric(self): np.sort(actual.a.cat.categories), ordered=True) tm.assert_frame_equal(actual, expected) - def test_categorical_dtype_encoding(self, datapath, unicode_series_file): + def test_categorical_dtype_encoding(self, datapath): # GH 10153 + pth = datapath('io', 'parser', 'data', 'unicode_series.csv') encoding = 'latin-1' - expected = self.read_csv(unicode_series_file, - header=None, encoding=encoding) + expected = self.read_csv(pth, header=None, encoding=encoding) expected[1] = Categorical(expected[1]) - actual = self.read_csv(unicode_series_file, - header=None, encoding=encoding, + actual = self.read_csv(pth, header=None, encoding=encoding, dtype={1: 'category'}) tm.assert_frame_equal(actual, expected)