diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 835c4cc9d4ab3..608e2c8e72ded 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -503,6 +503,47 @@ method, allowing you to permute the hierarchical index levels in one step: df[:5].reorder_levels([1,0], axis=0) +.. _advanced.index_names: + +Renaming names of an ``Index`` or ``MultiIndex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.rename` method is used to rename the labels of a +``MultiIndex``, and is typically used to rename the columns of a ``DataFrame``. +The ``columns`` argument of ``rename`` allows a dictionary to be specified +that includes only the columns you wish to rename. + +.. ipython:: python + + df.rename(columns={0: "col0", 1: "col1"}) + +This method can also be used to rename specific labels of the main index +of the ``DataFrame``. + +.. ipython:: python + + df.rename(index={"one" : "two", "y" : "z"}) + +The :meth:`~DataFrame.rename_axis` method is used to rename the name of a +``Index`` or ``MultiIndex``. In particular, the names of the levels of a +``MultiIndex`` can be specified, which is useful if ``reset_index()`` is later +used to move the values from the ``MultiIndex`` to a column. + +.. ipython:: python + + df.rename_axis(index=['abc', 'def']) + +Note that the columns of a ``DataFrame`` are an index, so that using +``rename_axis`` with the ``columns`` argument will change the name of that +index. + +.. ipython:: python + + df.rename_axis(columns="Cols").columns + +Both ``rename`` and ``rename_axis`` support specifying a dictionary, +``Series`` or a mapping function to map labels/names to new values. + Sorting a ``MultiIndex`` ------------------------ diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 6eeb97349100a..16ab345fd1744 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1466,8 +1466,21 @@ for altering the ``Series.name`` attribute. .. _basics.rename_axis: -The Panel class has a related :meth:`~Panel.rename_axis` class which can rename -any of its three axes. +.. versionadded:: 0.24.0 + +The methods :meth:`~DataFrame.rename_axis` and :meth:`~Series.rename_axis` +allow specific names of a `MultiIndex` to be changed (as opposed to the +labels). + +.. ipython:: python + + df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], + 'y': [10, 20, 30, 40, 50, 60]}, + index=pd.MultiIndex.from_product([['a', 'b', 'c'], [1, 2]], + names=['let', 'num'])) + df + df.rename_axis(index={'let': 'abc'}) + df.rename_axis(index=str.upper) .. _basics.iteration: diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ba164a82c162e..ddf5fffb1d80b 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -180,6 +180,29 @@ array, but rather an ``ExtensionArray``: This is the same behavior as ``Series.values`` for categorical data. See :ref:`whatsnew_0240.api_breaking.interval_values` for more. +.. _whatsnew_0240.enhancements.rename_axis: + +Renaming names in a MultiIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.rename_axis` now supports ``index`` and ``columns`` arguments +and :func:`Series.rename_axis` supports ``index`` argument (:issue:`19978`) + +This change allows a dictionary to be passed so that some of the names +of a ``MultiIndex`` can be changed. + +Example: + +.. ipython:: python + + mi = pd.MultiIndex.from_product([list('AB'), list('CD'), list('EF')], + names=['AB', 'CD', 'EF']) + df = pd.DataFrame([i for i in range(len(mi))], index=mi, columns=['N']) + df + df.rename_axis(index={'CD': 'New'}) + +See the :ref:`advanced docs on renaming` for more details. + .. _whatsnew_0240.enhancements.other: Other Enhancements diff --git a/pandas/core/common.py b/pandas/core/common.py index f6e40faa79740..0a82dd8636888 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -454,3 +454,21 @@ def _pipe(obj, func, *args, **kwargs): return func(*args, **kwargs) else: return func(obj, *args, **kwargs) + + +def _get_rename_function(mapper): + """ + Returns a function that will map names/labels, dependent if mapper + is a dict, Series or just a function. + """ + if isinstance(mapper, (compat.Mapping, ABCSeries)): + + def f(x): + if x in mapper: + return mapper[x] + else: + return x + else: + f = mapper + + return f diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 693cd14c8ca1d..db10494f0724d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -53,7 +53,8 @@ isidentifier, set_function_name, cPickle as pkl) from pandas.core.ops import _align_method_FRAME import pandas.core.nanops as nanops -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import (Appender, Substitution, + rewrite_axis_style_signature) from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core import config @@ -1079,20 +1080,6 @@ def rename(self, *args, **kwargs): if com.count_not_none(*axes.values()) == 0: raise TypeError('must pass an index to rename') - # renamer function if passed a dict - def _get_rename_function(mapper): - if isinstance(mapper, (dict, ABCSeries)): - - def f(x): - if x in mapper: - return mapper[x] - else: - return x - else: - f = mapper - - return f - self._consolidate_inplace() result = self if inplace else self.copy(deep=copy) @@ -1101,7 +1088,7 @@ def f(x): v = axes.get(self._AXIS_NAMES[axis]) if v is None: continue - f = _get_rename_function(v) + f = com._get_rename_function(v) baxis = self._get_block_manager_axis(axis) if level is not None: @@ -1115,16 +1102,28 @@ def f(x): else: return result.__finalize__(self) - def rename_axis(self, mapper, axis=0, copy=True, inplace=False): + @rewrite_axis_style_signature('mapper', [('copy', True), + ('inplace', False)]) + def rename_axis(self, mapper=None, **kwargs): """ - Alter the name of the index or columns. + Alter the name of the index or name of Index object that is the + columns. Parameters ---------- mapper : scalar, list-like, optional - Value to set as the axis name attribute. - axis : {0 or 'index', 1 or 'columns'}, default 0 - The index or the name of the axis. + Value to set the axis name attribute. + index, columns : scalar, list-like, dict-like or function, optional + dict-like or functions transformations to apply to + that axis' values. + + Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index`` + and/or ``columns``. + + .. versionchanged:: 0.24.0 + + axis : int or string, default 0 copy : boolean, default True Also copy underlying data. inplace : boolean, default False @@ -1143,6 +1142,23 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): deprecated and will be removed in a future version. Use ``rename`` instead. + ``DataFrame.rename_axis`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + The first calling convention will only modify the names of + the index and/or the names of the Index object that is the columns. + In this case, the parameter ``copy`` is ignored. + + The second calling convention will modify the names of the + the corresponding index if mapper is a list or a scalar. + However, if mapper is dict-like or a function, it will use the + deprecated behavior of modifying the axis *labels*. + + We *highly* recommend using keyword arguments to clarify your + intent. + See Also -------- pandas.Series.rename : Alter Series index labels or name @@ -1176,20 +1192,94 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): 0 1 4 1 2 5 2 3 6 - """ + + >>> mi = pd.MultiIndex.from_product([['a', 'b', 'c'], [1, 2]], + ... names=['let','num']) + >>> df = pd.DataFrame({'x': [i for i in range(len(mi))], + ... 'y' : [i*10 for i in range(len(mi))]}, + ... index=mi) + >>> df.rename_axis(index={'num' : 'n'}) + x y + let n + a 1 0 0 + 2 1 10 + b 1 2 20 + 2 3 30 + c 1 4 40 + 2 5 50 + + >>> cdf = df.rename_axis(columns='col') + >>> cdf + col x y + let num + a 1 0 0 + 2 1 10 + b 1 2 20 + 2 3 30 + c 1 4 40 + 2 5 50 + + >>> cdf.rename_axis(columns=str.upper) + COL x y + let num + a 1 0 0 + 2 1 10 + b 1 2 20 + 2 3 30 + c 1 4 40 + 2 5 50 + + """ + axes, kwargs = self._construct_axes_from_arguments((), kwargs) + copy = kwargs.pop('copy', True) + inplace = kwargs.pop('inplace', False) + axis = kwargs.pop('axis', 0) + if axis is not None: + axis = self._get_axis_number(axis) + + if kwargs: + raise TypeError('rename_axis() got an unexpected keyword ' + 'argument "{0}"'.format(list(kwargs.keys())[0])) + inplace = validate_bool_kwarg(inplace, 'inplace') - non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not - is_dict_like(mapper)) - if non_mapper: - return self._set_axis_name(mapper, axis=axis, inplace=inplace) + + if (mapper is not None): + # Use v0.23 behavior if a scalar or list + non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not + is_dict_like(mapper)) + if non_mapper: + return self._set_axis_name(mapper, axis=axis, inplace=inplace) + else: + # Deprecated (v0.21) behavior is if mapper is specified, + # and not a list or scalar, then call rename + msg = ("Using 'rename_axis' to alter labels is deprecated. " + "Use '.rename' instead") + warnings.warn(msg, FutureWarning, stacklevel=3) + axis = self._get_axis_name(axis) + d = {'copy': copy, 'inplace': inplace} + d[axis] = mapper + return self.rename(**d) else: - msg = ("Using 'rename_axis' to alter labels is deprecated. " - "Use '.rename' instead") - warnings.warn(msg, FutureWarning, stacklevel=2) - axis = self._get_axis_name(axis) - d = {'copy': copy, 'inplace': inplace} - d[axis] = mapper - return self.rename(**d) + # Use new behavior. Means that index and/or columns + # is specified + result = self if inplace else self.copy(deep=copy) + + for axis in lrange(self._AXIS_LEN): + v = axes.get(self._AXIS_NAMES[axis]) + if v is None: + continue + non_mapper = is_scalar(v) or (is_list_like(v) and not + is_dict_like(v)) + if non_mapper: + newnames = v + else: + f = com._get_rename_function(v) + curnames = self._get_axis(axis).names + newnames = [f(name) for name in curnames] + result._set_axis_name(newnames, axis=axis, + inplace=True) + if not inplace: + return result def _set_axis_name(self, name, axis=0, inplace=False): """ diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 0bf2d537f3698..0752c125b75eb 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -538,6 +538,52 @@ def test_rename_axis_warns(self): df['A'].rename_axis(id) assert 'rename' in str(w[0].message) + def test_rename_axis_mapper(self): + # GH 19978 + mi = MultiIndex.from_product([['a', 'b', 'c'], [1, 2]], + names=['ll', 'nn']) + df = DataFrame({'x': [i for i in range(len(mi))], + 'y': [i * 10 for i in range(len(mi))]}, + index=mi) + + # Test for rename of the Index object of columns + result = df.rename_axis('cols', axis=1) + tm.assert_index_equal(result.columns, + Index(['x', 'y'], name='cols')) + + # Test for rename of the Index object of columns using dict + result = result.rename_axis(columns={'cols': 'new'}, axis=1) + tm.assert_index_equal(result.columns, + Index(['x', 'y'], name='new')) + + # Test for renaming index using dict + result = df.rename_axis(index={'ll': 'foo'}) + assert result.index.names == ['foo', 'nn'] + + # Test for renaming index using a function + result = df.rename_axis(index=str.upper, axis=0) + assert result.index.names == ['LL', 'NN'] + + # Test for renaming index providing complete list + result = df.rename_axis(index=['foo', 'goo']) + assert result.index.names == ['foo', 'goo'] + + # Test for changing index and columns at same time + sdf = df.reset_index().set_index('nn').drop(columns=['ll', 'y']) + result = sdf.rename_axis(index='foo', columns='meh') + assert result.index.name == 'foo' + assert result.columns.name == 'meh' + + # Test different error cases + with tm.assert_raises_regex(TypeError, 'Must pass'): + df.rename_axis(index='wrong') + + with tm.assert_raises_regex(ValueError, 'Length of names'): + df.rename_axis(index=['wrong']) + + with tm.assert_raises_regex(TypeError, 'bogus'): + df.rename_axis(bogus=None) + def test_rename_multiindex(self): tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')] diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 9dfb493cb129c..337eb74b3b51a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1345,8 +1345,8 @@ def test_append_with_strings(self): with catch_warnings(record=True): simplefilter("ignore", FutureWarning) wp = tm.makePanel() - wp2 = wp.rename_axis( - {x: "%s_extra" % x for x in wp.minor_axis}, axis=2) + wp2 = wp.rename( + minor_axis={x: "%s_extra" % x for x in wp.minor_axis}) def check_col(key, name, size): assert getattr(store.get_storer(key) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index e65a2e9f9d4fa..23bf8896409c9 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1553,12 +1553,11 @@ def df(): panel1 = make_panel() panel2 = make_panel() - panel2 = panel2.rename_axis({x: "%s_1" % x - for x in panel2.major_axis}, - axis=1) + panel2 = panel2.rename(major_axis={x: "%s_1" % x + for x in panel2.major_axis}) - panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1) - panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2) + panel3 = panel2.rename(major_axis=lambda x: '%s_1' % x) + panel3 = panel3.rename(minor_axis=lambda x: '%s_1' % x) # it works! concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index be3d0cd6d929c..00e145680c7a6 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -226,6 +226,24 @@ def test_reorder_levels(self): expected = Series(np.arange(6), index=e_idx) tm.assert_series_equal(result, expected) + def test_rename_axis_mapper(self): + # GH 19978 + mi = MultiIndex.from_product([['a', 'b', 'c'], [1, 2]], + names=['ll', 'nn']) + s = Series([i for i in range(len(mi))], index=mi) + + result = s.rename_axis(index={'ll': 'foo'}) + assert result.index.names == ['foo', 'nn'] + + result = s.rename_axis(index=str.upper, axis=0) + assert result.index.names == ['LL', 'NN'] + + result = s.rename_axis(index=['foo', 'goo']) + assert result.index.names == ['foo', 'goo'] + + with tm.assert_raises_regex(TypeError, 'unexpected'): + s.rename_axis(columns='wrong') + def test_rename_axis_inplace(self, datetime_series): # GH 15704 expected = datetime_series.rename_axis('foo') diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 51c779c6a97a3..775fcc2684f42 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2110,16 +2110,16 @@ def test_repr_empty(self): def test_rename(self): mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'} - renamed = self.panel.rename_axis(mapper, axis=0) + renamed = self.panel.rename(items=mapper) exp = Index(['foo', 'bar', 'baz']) tm.assert_index_equal(renamed.items, exp) - renamed = self.panel.rename_axis(str.lower, axis=2) + renamed = self.panel.rename(minor_axis=str.lower) exp = Index(['a', 'b', 'c', 'd']) tm.assert_index_equal(renamed.minor_axis, exp) # don't copy - renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False) + renamed_nocopy = self.panel.rename(items=mapper, copy=False) renamed_nocopy['foo'] = 3. assert (self.panel['ItemA'].values == 3).all()