Remove Categorical.name to make it more numpy.ndarray like

`name` was initialy introduced to save the name of a Series/column during a groupby, when categorical was mostly a helper for that. See here for the discussion: #10482 Closes: #10482
pandas-dev · Jul 25, 2015 · e4639ee · e4639ee
1 parent 5a9a9da
commit e4639ee
Show file tree

Hide file tree

Showing 11 changed files with 65 additions and 94 deletions.
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -270,6 +270,7 @@ Other API Changes
 - Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`)
 - Allow passing `kwargs` to the interpolation methods (:issue:`10378`).
 - Serialize metadata properties of subclasses of pandas objects (:issue:`10553`).
+- ``Categorical.name`` was removed to make `Categorical` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`).
 
 - ``NaT``'s methods now either raise ``ValueError``, or return ``np.nan`` or ``NaT`` (:issue:`9513`)
 ===========================     ==============================================================

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -147,9 +147,6 @@ class Categorical(PandasObject):
     ordered : boolean, (default False)
         Whether or not this categorical is treated as a ordered categorical. If not given,
         the resulting categorical will not be ordered.
-    name : str, optional
-        Name for the Categorical variable. If name is None, will attempt
-        to infer from values.
 
     Attributes
     ----------
@@ -159,8 +156,6 @@ class Categorical(PandasObject):
         The codes (integer positions, which point to the categories) of this categorical, read only.
     ordered : boolean
         Whether or not this Categorical is ordered.
-    name : string
-        The name of this Categorical.
 
     Raises
     ------
@@ -205,31 +200,31 @@ class Categorical(PandasObject):
     # For comparisons, so that numpy uses our implementation if the compare ops, which raise
     __array_priority__ = 1000
     _typ = 'categorical'
-    name = None
 
     def __init__(self, values, categories=None, ordered=False, name=None, fastpath=False,
                  levels=None):
 
         if fastpath:
             # fast path
             self._codes = _coerce_indexer_dtype(values, categories)
-            self.name = name
             self.categories = categories
             self._ordered = ordered
             return
 
-        if name is None:
-            name = getattr(values, 'name', None)
+        if not name is None:
+            msg = "the 'name' keyword is removed, use 'name' with consumers of the " \
+                  "categorical instead (e.g. 'Series(cat, name=\"something\")'"
+            warn(msg, UserWarning, stacklevel=2)
 
         # TODO: Remove after deprecation period in 2017/ after 0.18
         if not levels is None:
             warn("Creating a 'Categorical' with 'levels' is deprecated, use 'categories' instead",
-                 FutureWarning)
+                 FutureWarning, stacklevel=2)
             if categories is None:
                 categories = levels
             else:
                 raise ValueError("Cannot pass in both 'categories' and (deprecated) 'levels', "
-                                 "use only 'categories'")
+                                 "use only 'categories'", stacklevel=2)
 
         # sanitize input
         if is_categorical_dtype(values):
@@ -293,21 +288,20 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
             # TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
             if is_integer_dtype(values) and not is_integer_dtype(categories):
                 warn("Values and categories have different dtypes. Did you mean to use\n"
-                     "'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
+                     "'Categorical.from_codes(codes, categories)'?", RuntimeWarning, stacklevel=2)
 
             if len(values) and is_integer_dtype(values) and (codes == -1).all():
                 warn("None of the categories were found in values. Did you mean to use\n"
-                     "'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
+                     "'Categorical.from_codes(codes, categories)'?", RuntimeWarning, stacklevel=2)
 
         self.set_ordered(ordered or False, inplace=True)
         self.categories = categories
-        self.name = name
         self._codes = _coerce_indexer_dtype(codes, categories)
 
     def copy(self):
         """ Copy constructor. """
         return Categorical(values=self._codes.copy(),categories=self.categories,
-                           name=self.name, ordered=self.ordered, fastpath=True)
+                           ordered=self.ordered, fastpath=True)
 
     def astype(self, dtype):
         """ coerce this type to another dtype """
@@ -373,9 +367,12 @@ def from_codes(cls, codes, categories, ordered=False, name=None):
         ordered : boolean, (default False)
             Whether or not this categorical is treated as a ordered categorical. If not given,
             the resulting categorical will be unordered.
-        name : str, optional
-            Name for the Categorical variable.
         """
+        if not name is None:
+            msg = "the 'name' keyword is removed, use 'name' with consumers of the " \
+                  "categorical instead (e.g. 'Series(cat, name=\"something\")'"
+            warn(msg, UserWarning, stacklevel=2)
+
         try:
             codes = np.asarray(codes, np.int64)
         except:
@@ -386,7 +383,7 @@ def from_codes(cls, codes, categories, ordered=False, name=None):
         if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
             raise ValueError("codes need to be between -1 and len(categories)-1")
 
-        return Categorical(codes, categories=categories, ordered=ordered, name=name, fastpath=True)
+        return Categorical(codes, categories=categories, ordered=ordered, fastpath=True)
 
     _codes = None
 
@@ -416,8 +413,7 @@ def _get_labels(self):
 
         Deprecated, use .codes!
         """
-        import warnings
-        warnings.warn("'labels' is deprecated. Use 'codes' instead", FutureWarning)
+        warn("'labels' is deprecated. Use 'codes' instead", FutureWarning, stacklevel=3)
         return self.codes
 
     labels = property(fget=_get_labels, fset=_set_codes)
@@ -464,12 +460,12 @@ def _get_categories(self):
 
     def _set_levels(self, levels):
         """ set new levels (deprecated, use "categories") """
-        warn("Assigning to 'levels' is deprecated, use 'categories'", FutureWarning)
+        warn("Assigning to 'levels' is deprecated, use 'categories'", FutureWarning, stacklevel=3)
         self.categories = levels
 
     def _get_levels(self):
         """ Gets the levels (deprecated, use "categories") """
-        warn("Accessing 'levels' is deprecated, use 'categories'", FutureWarning)
+        warn("Accessing 'levels' is deprecated, use 'categories'", FutureWarning, stacklevel=3)
         return self.categories
 
     # TODO: Remove after deprecation period in 2017/ after 0.18
@@ -479,7 +475,8 @@ def _get_levels(self):
 
     def _set_ordered(self, value):
         """ Sets the ordered attribute to the boolean value """
-        warn("Setting 'ordered' directly is deprecated, use 'set_ordered'", FutureWarning)
+        warn("Setting 'ordered' directly is deprecated, use 'set_ordered'", FutureWarning,
+             stacklevel=3)
         self.set_ordered(value, inplace=True)
 
     def set_ordered(self, value, inplace=False):
@@ -1140,7 +1137,7 @@ def order(self, inplace=False, ascending=True, na_position='last'):
             return
         else:
             return Categorical(values=codes,categories=self.categories, ordered=self.ordered,
-                               name=self.name, fastpath=True)
+                               fastpath=True)
 
 
     def sort(self, inplace=True, ascending=True, na_position='last'):
@@ -1266,7 +1263,7 @@ def fillna(self, value=None, method=None, limit=None):
                 values[mask] = self.categories.get_loc(value)
 
         return Categorical(values, categories=self.categories, ordered=self.ordered,
-                           name=self.name, fastpath=True)
+                           fastpath=True)
 
     def take_nd(self, indexer, allow_fill=True, fill_value=None):
         """ Take the codes by the indexer, fill with the fill_value.
@@ -1280,7 +1277,7 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None):
 
         codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
         result = Categorical(codes, categories=self.categories, ordered=self.ordered,
-                             name=self.name, fastpath=True)
+                             fastpath=True)
         return result
 
     take = take_nd
@@ -1300,7 +1297,7 @@ def _slice(self, slicer):
 
         _codes = self._codes[slicer]
         return Categorical(values=_codes,categories=self.categories, ordered=self.ordered,
-                           name=self.name, fastpath=True)
+                           fastpath=True)
 
     def __len__(self):
         """The length of this Categorical."""
@@ -1313,9 +1310,8 @@ def __iter__(self):
     def _tidy_repr(self, max_vals=10, footer=True):
         """ a short repr displaying only max_vals and an optional (but default footer) """
         num = max_vals // 2
-        head = self[:num]._get_repr(length=False, name=False, footer=False)
+        head = self[:num]._get_repr(length=False, footer=False)
         tail = self[-(max_vals - num):]._get_repr(length=False,
-                                                  name=False,
                                                   footer=False)
 
         result = '%s, ..., %s' % (head[:-1], tail[1:])
@@ -1369,14 +1365,11 @@ def _repr_categories_info(self):
 
     def _repr_footer(self):
 
-        namestr = "Name: %s, " % self.name if self.name is not None else ""
-        return u('%sLength: %d\n%s') % (namestr,
-                                       len(self), self._repr_categories_info())
+        return u('Length: %d\n%s') % (len(self), self._repr_categories_info())
 
-    def _get_repr(self, name=False, length=True, na_rep='NaN', footer=True):
+    def _get_repr(self, length=True, na_rep='NaN', footer=True):
         from pandas.core import format as fmt
         formatter = fmt.CategoricalFormatter(self,
-                                             name=name,
                                              length=length,
                                              na_rep=na_rep,
                                              footer=footer)
@@ -1389,11 +1382,9 @@ def __unicode__(self):
         if len(self._codes) > _maxlen:
             result = self._tidy_repr(_maxlen)
         elif len(self._codes) > 0:
-            result = self._get_repr(length=len(self) > _maxlen,
-                                    name=True)
+            result = self._get_repr(length=len(self) > _maxlen)
         else:
-            result = '[], %s' % self._get_repr(name=True,
-                                               length=False,
+            result = '[], %s' % self._get_repr(length=False,
                                                footer=True,
                                                ).replace("\n",", ")
 
@@ -1562,8 +1553,7 @@ def mode(self):
         import pandas.hashtable as htable
         good = self._codes != -1
         result = Categorical(sorted(htable.mode_int64(_ensure_int64(self._codes[good]))),
-                             categories=self.categories,ordered=self.ordered, name=self.name,
-                             fastpath=True)
+                             categories=self.categories,ordered=self.ordered, fastpath=True)
         return result
 
     def unique(self):
@@ -1586,8 +1576,6 @@ def equals(self, other):
         """
         Returns True if categorical arrays are equal.
 
-        The name of the `Categorical` is not compared!
-
         Parameters
         ----------
         other : `Categorical`
@@ -1596,7 +1584,6 @@ def equals(self, other):
         -------
         are_equal : boolean
         """
-        # TODO: should this also test if name is equal?
         return self.is_dtype_equal(other) and np.array_equal(self._codes, other._codes)
 
     def is_dtype_equal(self, other):
@@ -1647,7 +1634,7 @@ def repeat(self, repeats):
         """
         codes = self._codes.repeat(repeats)
         return Categorical(values=codes, categories=self.categories,
-                           ordered=self.ordered, name=self.name, fastpath=True)
+                           ordered=self.ordered, fastpath=True)
 
 
 ##### The Series.cat accessor #####
@@ -1696,7 +1683,6 @@ def _delegate_method(self, name, *args, **kwargs):
         if not res is None:
             return Series(res, index=self.index)
 
-# TODO: remove levels after the deprecation period
 CategoricalAccessor._add_delegate_accessors(delegate=Categorical,
                                             accessors=["categories", "ordered"],
                                             typ='property')

diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -68,23 +68,16 @@
 class CategoricalFormatter(object):
 
     def __init__(self, categorical, buf=None, length=True,
-                 na_rep='NaN', name=False, footer=True):
+                 na_rep='NaN', footer=True):
         self.categorical = categorical
         self.buf = buf if buf is not None else StringIO(u(""))
-        self.name = name
         self.na_rep = na_rep
         self.length = length
         self.footer = footer
 
     def _get_footer(self):
         footer = ''
 
-        if self.name:
-            name = com.pprint_thing(self.categorical.name,
-                                    escape_chars=('\t', '\r', '\n'))
-            footer += ('Name: %s' % name if self.categorical.name is not None
-                       else '')
-
         if self.length:
             if footer:
                 footer += ', '

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1960,8 +1960,6 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                 self._group_index = CategoricalIndex(Categorical.from_codes(np.arange(len(c)),
                                                      categories=c,
                                                      ordered=self.grouper.ordered))
-                if self.name is None:
-                    self.name = self.grouper.name
 
             # a passed Grouper like
             elif isinstance(self.grouper, Grouper):

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -4414,7 +4414,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
         levels = [c.categories for c in cats]
         labels = [c.codes for c in cats]
         if names is None:
-            names = [c.name for c in cats]
+            names = [getattr(arr, "name", None) for arr in arrays]
 
         return MultiIndex(levels=levels, labels=labels,
                           sortorder=sortorder, names=names,

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -189,8 +189,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
             elif isinstance(data, Categorical):
                 if dtype is not None:
                     raise ValueError("cannot specify a dtype with a Categorical")
-                if name is None:
-                    name = data.name
             elif (isinstance(data, types.GeneratorType) or
                   (compat.PY3 and isinstance(data, map))):
                 data = list(data)