From edc6e9333425b2baaf566c9407acde37ecaa52ec Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sat, 15 Sep 2018 18:10:27 +0100
Subject: [PATCH 1/2] remove duplicate file and create filepath fixture

---
 pandas/tests/io/conftest.py                    |  6 ++++++
 .../io/{formats => }/data/unicode_series.csv   |  0
 pandas/tests/io/formats/test_format.py         |  5 ++---
 pandas/tests/io/parser/common.py               |  7 +++----
 pandas/tests/io/parser/data/unicode_series.csv | 18 ------------------
 pandas/tests/io/parser/dtypes.py               |  9 +++++----
 6 files changed, 16 insertions(+), 29 deletions(-)
 rename pandas/tests/io/{formats => }/data/unicode_series.csv (100%)
 delete mode 100644 pandas/tests/io/parser/data/unicode_series.csv

diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index b0cdbe2b5bedb..222dc6a49ca1d 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -14,6 +14,12 @@ def jsonl_file(datapath):
     return datapath('io', 'parser', 'data', 'items.jsonl')
 
 
+@pytest.fixture
+def unicode_series_file(datapath):
+    """Path to unicode_series dataset"""
+    return datapath('io', 'data', 'unicode_series.csv')
+
+
 @pytest.fixture
 def salaries_table(datapath):
     """DataFrame with the salaries dataset"""
diff --git a/pandas/tests/io/formats/data/unicode_series.csv b/pandas/tests/io/data/unicode_series.csv
similarity index 100%
rename from pandas/tests/io/formats/data/unicode_series.csv
rename to pandas/tests/io/data/unicode_series.csv
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index c19f8e57f9ae7..344a2e937920e 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -954,9 +954,8 @@ def test_unicode_problem_decoding_as_ascii(self):
         dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})})
         compat.text_type(dm.to_string())
 
-    def test_string_repr_encoding(self, datapath):
-        filepath = datapath('io', 'formats', 'data', 'unicode_series.csv')
-        df = pd.read_csv(filepath, header=None, encoding='latin1')
+    def test_string_repr_encoding(self, unicode_series_file):
+        df = pd.read_csv(unicode_series_file, header=None, encoding='latin1')
         repr(df)
         repr(df[1])
 
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 9e871d27f0ce8..ac1c8c28e5846 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -765,10 +765,9 @@ def test_utf16_example(self, datapath):
             result = self.read_table(buf, encoding='utf-16')
             assert len(result) == 50
 
-    def test_unicode_encoding(self, datapath):
-        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
-
-        result = self.read_csv(pth, header=None, encoding='latin-1')
+    def test_unicode_encoding(self, unicode_series_file):
+        result = self.read_csv(unicode_series_file,
+                               header=None, encoding='latin-1')
         result = result.set_index(0)
 
         got = result[1][1632]
diff --git a/pandas/tests/io/parser/data/unicode_series.csv b/pandas/tests/io/parser/data/unicode_series.csv
deleted file mode 100644
index 2485e149edb06..0000000000000
--- a/pandas/tests/io/parser/data/unicode_series.csv
+++ /dev/null
@@ -1,18 +0,0 @@
-1617,King of New York (1990)
-1618,All Things Fair (1996)
-1619,"Sixth Man, The (1997)"
-1620,Butterfly Kiss (1995)
-1621,"Paris, France (1993)"
-1622,"Cérémonie, La (1995)"
-1623,Hush (1998)
-1624,Nightwatch (1997)
-1625,Nobody Loves Me (Keiner liebt mich) (1994)
-1626,"Wife, The (1995)"
-1627,Lamerica (1994)
-1628,Nico Icon (1995)
-1629,"Silence of the Palace, The (Saimt el Qusur) (1994)"
-1630,"Slingshot, The (1993)"
-1631,Land and Freedom (Tierra y libertad) (1995)
-1632,Á köldum klaka (Cold Fever) (1994)
-1633,Etz Hadomim Tafus (Under the Domin Tree) (1994)
-1634,Two Friends (1986)
diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py
index 8060ebf2fbcd4..d315e92f33ccc 100644
--- a/pandas/tests/io/parser/dtypes.py
+++ b/pandas/tests/io/parser/dtypes.py
@@ -125,13 +125,14 @@ def test_categorical_dtype_high_cardinality_numeric(self):
             np.sort(actual.a.cat.categories), ordered=True)
         tm.assert_frame_equal(actual, expected)
 
-    def test_categorical_dtype_encoding(self, datapath):
+    def test_categorical_dtype_encoding(self, datapath, unicode_series_file):
         # GH 10153
-        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
         encoding = 'latin-1'
-        expected = self.read_csv(pth, header=None, encoding=encoding)
+        expected = self.read_csv(unicode_series_file,
+                                 header=None, encoding=encoding)
         expected[1] = Categorical(expected[1])
-        actual = self.read_csv(pth, header=None, encoding=encoding,
+        actual = self.read_csv(unicode_series_file,
+                               header=None, encoding=encoding,
                                dtype={1: 'category'})
         tm.assert_frame_equal(actual, expected)
 

From b11a55eb9697f45de3004d57b28534eb2fe9e4b6 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sat, 15 Sep 2018 21:23:09 +0100
Subject: [PATCH 2/2] change data file used by test_format.py only

---
 pandas/tests/io/conftest.py                          | 6 ------
 pandas/tests/io/formats/test_format.py               | 5 +++--
 pandas/tests/io/parser/common.py                     | 7 ++++---
 pandas/tests/io/{ => parser}/data/unicode_series.csv | 0
 pandas/tests/io/parser/dtypes.py                     | 9 ++++-----
 5 files changed, 11 insertions(+), 16 deletions(-)
 rename pandas/tests/io/{ => parser}/data/unicode_series.csv (100%)

diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index 222dc6a49ca1d..b0cdbe2b5bedb 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -14,12 +14,6 @@ def jsonl_file(datapath):
     return datapath('io', 'parser', 'data', 'items.jsonl')
 
 
-@pytest.fixture
-def unicode_series_file(datapath):
-    """Path to unicode_series dataset"""
-    return datapath('io', 'data', 'unicode_series.csv')
-
-
 @pytest.fixture
 def salaries_table(datapath):
     """DataFrame with the salaries dataset"""
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 344a2e937920e..ffbc978b92ba5 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -954,8 +954,9 @@ def test_unicode_problem_decoding_as_ascii(self):
         dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})})
         compat.text_type(dm.to_string())
 
-    def test_string_repr_encoding(self, unicode_series_file):
-        df = pd.read_csv(unicode_series_file, header=None, encoding='latin1')
+    def test_string_repr_encoding(self, datapath):
+        filepath = datapath('io', 'parser', 'data', 'unicode_series.csv')
+        df = pd.read_csv(filepath, header=None, encoding='latin1')
         repr(df)
         repr(df[1])
 
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index ac1c8c28e5846..9e871d27f0ce8 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -765,9 +765,10 @@ def test_utf16_example(self, datapath):
             result = self.read_table(buf, encoding='utf-16')
             assert len(result) == 50
 
-    def test_unicode_encoding(self, unicode_series_file):
-        result = self.read_csv(unicode_series_file,
-                               header=None, encoding='latin-1')
+    def test_unicode_encoding(self, datapath):
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
+
+        result = self.read_csv(pth, header=None, encoding='latin-1')
         result = result.set_index(0)
 
         got = result[1][1632]
diff --git a/pandas/tests/io/data/unicode_series.csv b/pandas/tests/io/parser/data/unicode_series.csv
similarity index 100%
rename from pandas/tests/io/data/unicode_series.csv
rename to pandas/tests/io/parser/data/unicode_series.csv
diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py
index d315e92f33ccc..8060ebf2fbcd4 100644
--- a/pandas/tests/io/parser/dtypes.py
+++ b/pandas/tests/io/parser/dtypes.py
@@ -125,14 +125,13 @@ def test_categorical_dtype_high_cardinality_numeric(self):
             np.sort(actual.a.cat.categories), ordered=True)
         tm.assert_frame_equal(actual, expected)
 
-    def test_categorical_dtype_encoding(self, datapath, unicode_series_file):
+    def test_categorical_dtype_encoding(self, datapath):
         # GH 10153
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
         encoding = 'latin-1'
-        expected = self.read_csv(unicode_series_file,
-                                 header=None, encoding=encoding)
+        expected = self.read_csv(pth, header=None, encoding=encoding)
         expected[1] = Categorical(expected[1])
-        actual = self.read_csv(unicode_series_file,
-                               header=None, encoding=encoding,
+        actual = self.read_csv(pth, header=None, encoding=encoding,
                                dtype={1: 'category'})
         tm.assert_frame_equal(actual, expected)