pandas-dev · jreback · Feb 24, 2019 · Jan 28, 2019 · Jan 28, 2019 · Jan 29, 2019
diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst
@@ -76,6 +76,7 @@ Bug Fixes
 **Reshaping**
 
 - Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`)
+- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`)
 
 **Visualization**
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4143,13 +4143,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                    'array, or a list containing only valid column keys and '
                    'one-dimensional arrays.')
 
-        if (is_scalar(keys) or isinstance(keys, tuple)
-                or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
-            # make sure we have a container of keys/arrays we can iterate over
-            # tuples can appear as valid column keys!
+        if not isinstance(keys, list):
             keys = [keys]
-        elif not isinstance(keys, list):
-            raise ValueError(err_msg)
 
         missing = []
         for col in keys:
@@ -4158,10 +4153,29 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                 # tuples are always considered keys, never as list-likes
                 if col not in self:
                     missing.append(col)
-            elif (not isinstance(col, (ABCIndexClass, ABCSeries,
-                                       np.ndarray, list))
-                  or getattr(col, 'ndim', 1) > 1):
-                raise ValueError(err_msg)
+            elif isinstance(col, (ABCIndexClass, ABCSeries,
+                                  np.ndarray, list)):
+                # arrays are fine as long as they are one-dimensional
+                if getattr(col, 'ndim', 1) > 1:
+                    raise ValueError(err_msg)
+            elif is_list_like(col) and not hasattr(col, '__len__'):
+                # various iterators/generators are hashable, but should not
+                # raise a KeyError; we identify them by their lack of __len__.
+                # hashable listlikes with __len__ get tested as keys below.
+                tipo = type(col)
+                raise TypeError(err_msg + ' Received column of '
+                                'type {}'.format(tipo))
+            else:
+                # everything else gets tried as a key; see GH 24969
+                try:
+                    found = col in self.columns
+                except TypeError:
+                    tipo = type(col)
+                    raise TypeError(err_msg + ' Received column of '
+                                    'type {}'.format(tipo))
+                else:
+                    if not found:
+                        missing.append(col)
 
         if missing:
             raise KeyError('{}'.format(missing))

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
@@ -255,21 +255,126 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
 
     @pytest.mark.parametrize('append', [True, False])
     @pytest.mark.parametrize('drop', [True, False])
-    @pytest.mark.parametrize('box', [set, iter])
+    @pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)],
+                             ids=['set', 'iter', 'generator'])
     def test_set_index_raise_on_type(self, frame_of_index_cols, box,
                                      drop, append):
         df = frame_of_index_cols
 
         msg = 'The parameter "keys" may be a column key, .*'
-        # forbidden type, e.g. set/tuple/iter
-        with pytest.raises(ValueError, match=msg):
+        # forbidden type, e.g. set/iter/generator
+        with pytest.raises(TypeError, match=msg):
             df.set_index(box(df['A']), drop=drop, append=append)
 
-        # forbidden type in list, e.g. set/tuple/iter
-        with pytest.raises(ValueError, match=msg):
+        # forbidden type in list, e.g. set/iter/generator
+        with pytest.raises(TypeError, match=msg):
             df.set_index(['A', df['A'], box(df['A'])],
                          drop=drop, append=append)
 
+    def test_set_index_custom_label_type(self):
+        # GH 24969
+
+        class Thing(object):
+            def __init__(self, name, color):
+                self.name = name
+                self.color = color
+
+            def __str__(self):
+                return "<Thing %r>" % (self.name,)
+
+            # necessary for pretty KeyError
+            __repr__ = __str__
+
+        thing1 = Thing('One', 'red')
+        thing2 = Thing('Two', 'blue')
+        df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
+        expected = DataFrame({thing1: [0, 1]},
+                             index=Index([2, 3], name=thing2))
+
+        # use custom label directly
+        result = df.set_index(thing2)
+        tm.assert_frame_equal(result, expected)
+
+        # custom label wrapped in list
+        result = df.set_index([thing2])
+        tm.assert_frame_equal(result, expected)
+
+        # missing key
+        thing3 = Thing('Three', 'pink')
+        msg = "<Thing 'Three'>"
+        with pytest.raises(KeyError, match=msg):
+            # missing label directly
+            df.set_index(thing3)
+
+        with pytest.raises(KeyError, match=msg):
+            # missing label in list
+            df.set_index([thing3])
+
+    def test_set_index_custom_label_hashable_iterable(self):
+        # GH 24969
+
+        # actual example discussed in GH 24984 was e.g. for shapely.geometry
+        # objects (e.g. a collection of Points) that can be both hashable and
+        # iterable; using frozenset as a stand-in for testing here
+
+        class Thing(frozenset):
+            # need to stabilize repr for KeyError (due to random order in sets)
+            def __repr__(self):
+                tmp = sorted(list(self))
+                # double curly brace prints one brace in format string
+                return "frozenset({{{}}})".format(', '.join(map(repr, tmp)))
+
+        thing1 = Thing(['One', 'red'])
+        thing2 = Thing(['Two', 'blue'])
+        df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
+        expected = DataFrame({thing1: [0, 1]},
+                             index=Index([2, 3], name=thing2))
+
+        # use custom label directly
+        result = df.set_index(thing2)
+        tm.assert_frame_equal(result, expected)
+
+        # custom label wrapped in list
+        result = df.set_index([thing2])
+        tm.assert_frame_equal(result, expected)
+
+        # missing key
+        thing3 = Thing(['Three', 'pink'])
+        msg = r"frozenset\(\{'Three', 'pink'\}\)"
+        with pytest.raises(KeyError, match=msg):
+            # missing label directly
+            df.set_index(thing3)
+
+        with pytest.raises(KeyError, match=msg):
+            # missing label in list
+            df.set_index([thing3])
+
+    def test_set_index_custom_label_type_raises(self):
+        # GH 24969
+
+        # purposefully inherit from something unhashable
+        class Thing(set):
+            def __init__(self, name, color):
+                self.name = name
+                self.color = color
+
+            def __str__(self):
+                return "<Thing %r>" % (self.name,)
+
+        thing1 = Thing('One', 'red')
+        thing2 = Thing('Two', 'blue')
+        df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])
+
+        msg = 'The parameter "keys" may be a column key, .*'
+
+        with pytest.raises(TypeError, match=msg):
+            # use custom label directly
+            df.set_index(thing2)
+
+        with pytest.raises(TypeError, match=msg):
+            # custom label wrapped in list
+            df.set_index([thing2])
+
     def test_construction_with_categorical_index(self):
         ci = tm.makeCategoricalIndex(10)
         ci.name = 'B'