From b8e36ac55a719dc57758ef88c40a7a3fde6c9c9f Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Thu, 24 Aug 2017 17:06:11 +0100
Subject: [PATCH] BUG: Set index when reading stata file

Ensures index is set when requested when reading state dta file

closes #16342
---
 doc/source/whatsnew/v0.21.0.txt |  1 +
 pandas/io/stata.py              | 11 ++++++++---
 pandas/tests/io/test_stata.py   |  8 ++++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index fcadd26156b1d4..45e53a87ba8175 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -369,6 +369,7 @@ I/O
 - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`).
 - Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`)
 - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)
+- Bug in :func:`read_stata` where the index was not set (:issue:`16342`)
 - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)
 
 Plotting
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 253ed03c25db94..fac506f3cb6c40 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -1486,6 +1486,8 @@ def read(self, nrows=None, convert_dates=None,
             columns = self._columns
         if order_categoricals is None:
             order_categoricals = self._order_categoricals
+        if index is None:
+            index = self._index
 
         if nrows is None:
             nrows = self.nobs
@@ -1526,7 +1528,7 @@ def read(self, nrows=None, convert_dates=None,
         if len(data) == 0:
             data = DataFrame(columns=self.varlist, index=index)
         else:
-            data = DataFrame.from_records(data, index=index)
+            data = DataFrame.from_records(data)
             data.columns = self.varlist
 
         # If index is not specified, use actual row number rather than
@@ -1553,7 +1555,7 @@ def read(self, nrows=None, convert_dates=None,
         cols_ = np.where(self.dtyplist)[0]
 
         # Convert columns (if needed) to match input type
-        index = data.index
+        ix = data.index
         requires_type_conversion = False
         data_formatted = []
         for i in cols_:
@@ -1563,7 +1565,7 @@ def read(self, nrows=None, convert_dates=None,
                 if dtype != np.dtype(object) and dtype != self.dtyplist[i]:
                     requires_type_conversion = True
                     data_formatted.append(
-                        (col, Series(data[col], index, self.dtyplist[i])))
+                        (col, Series(data[col], ix, self.dtyplist[i])))
                 else:
                     data_formatted.append((col, data[col]))
         if requires_type_conversion:
@@ -1606,6 +1608,9 @@ def read(self, nrows=None, convert_dates=None,
             if convert:
                 data = DataFrame.from_items(retyped_data)
 
+        if index is not None:
+            data = data.set_index(data.pop(index))
+
         return data
 
     def _do_convert_missing(self, data, convert_missing):
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index a414928d318c42..e7f67fd9ac8c14 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -1309,3 +1309,11 @@ def test_value_labels_iterator(self, write_index):
             dta_iter = pd.read_stata(path, iterator=True)
             value_labels = dta_iter.value_labels()
         assert value_labels == {'A': {0: 'A', 1: 'B', 2: 'C', 3: 'E'}}
+
+    def test_set_index(self):
+        df = tm.makeDataFrame()
+        df.index.name = 'index'
+        with tm.ensure_clean() as path:
+            df.to_stata(path)
+            reread = pd.read_stata(path, index='index')
+        tm.assert_frame_equal(df, reread)