Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: rename MultiIndex.labels to MultiIndex.codes #23752

Merged
merged 12 commits into from
Dec 5, 2018
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,8 @@ def setup(self):
n1 = 400
n2 = 250
index = MultiIndex(levels=[np.arange(n1), tm.makeStringIndex(n2)],
labels=[np.repeat(range(n1), n2).tolist(),
list(range(n2)) * n1],
codes=[np.repeat(range(n1), n2).tolist(),
list(range(n2)) * n1],
names=['lev1', 'lev2'])
arr = np.random.randn(n1 * n2, 3)
arr[::10000, 0] = np.nan
Expand Down
10 changes: 5 additions & 5 deletions asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,16 @@ class Join(object):
def setup(self, sort):
level1 = tm.makeStringIndex(10).values
level2 = tm.makeStringIndex(1000).values
label1 = np.arange(10).repeat(1000)
label2 = np.tile(np.arange(1000), 10)
codes1 = np.arange(10).repeat(1000)
codes2 = np.tile(np.arange(1000), 10)
index2 = MultiIndex(levels=[level1, level2],
labels=[label1, label2])
codes=[codes1, codes2])
self.df_multi = DataFrame(np.random.randn(len(index2), 4),
index=index2,
columns=['A', 'B', 'C', 'D'])

self.key1 = np.tile(level1.take(label1), 10)
self.key2 = np.tile(level2.take(label2), 10)
self.key1 = np.tile(level1.take(codes1), 10)
self.key2 = np.tile(level2.take(codes2), 10)
self.df = DataFrame({'data1': np.random.randn(100000),
'data2': np.random.randn(100000),
'key1': self.key1,
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/multiindex_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def setup(self):
levels = [np.arange(n),
tm.makeStringIndex(n).values,
1000 + np.arange(n)]
labels = [np.random.choice(n, (k * n)) for lev in levels]
self.mi = MultiIndex(levels=levels, labels=labels)
codes = [np.random.choice(n, (k * n)) for lev in levels]
self.mi = MultiIndex(levels=levels, codes=codes)

def time_duplicated(self):
self.mi.duplicated()
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ class LevelAlign(object):
def setup(self):
self.index = MultiIndex(
levels=[np.arange(10), np.arange(100), np.arange(100)],
labels=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
codes=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
self.df = DataFrame(np.random.randn(len(self.index), 4),
index=self.index)
self.df_level = DataFrame(np.random.randn(100, 4),
Expand Down
16 changes: 8 additions & 8 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ class FrameMultiIndexOps(object):

def setup(self, level, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
labels = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, labels=labels)
codes = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, codes=codes)
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
self.df_func = getattr(df, op)

Expand Down Expand Up @@ -67,10 +67,10 @@ class SeriesMultiIndexOps(object):

def setup(self, level, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
labels = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, labels=labels)
codes = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, codes=codes)
s = pd.Series(np.random.randn(len(index)), index=index)
self.s_func = getattr(s, op)

Expand Down
7 changes: 6 additions & 1 deletion doc/source/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ analysis.

See the :ref:`cookbook<cookbook.multi_index>` for some advanced strategies.

.. versionchanged:: 0.24.0

:attr:`MultiIndex.labels` has been renamed to :attr:`MultiIndex.codes`
and :attr:`MultiIndex.set_labels` to :attr:`MultiIndex.set_codes`.

Creating a MultiIndex (hierarchical index) object
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -469,7 +474,7 @@ values across a level. For instance:
.. ipython:: python
midx = pd.MultiIndex(levels=[['zero', 'one'], ['x', 'y']],
labels=[[1, 1, 0, 0], [1, 0, 1, 0]])
codes=[[1, 1, 0, 0], [1, 0, 1, 0]])
df = pd.DataFrame(np.random.randn(4, 2), index=midx)
df
df2 = df.mean(level=0)
Expand Down
4 changes: 2 additions & 2 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1712,7 +1712,7 @@ MultiIndex Attributes

MultiIndex.names
MultiIndex.levels
MultiIndex.labels
MultiIndex.codes
MultiIndex.nlevels
MultiIndex.levshape

Expand All @@ -1723,7 +1723,7 @@ MultiIndex Components
:toctree: generated/

MultiIndex.set_levels
MultiIndex.set_labels
MultiIndex.set_codes
MultiIndex.to_hierarchical
MultiIndex.to_flat_index
MultiIndex.to_frame
Expand Down
2 changes: 1 addition & 1 deletion doc/source/dsintro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ From DataFrame using ``to_panel`` method
.. ipython:: python
:okwarning:
midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], labels=[[1,1,0,0],[1,0,1,0]])
midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], codes=[[1,1,0,0],[1,0,1,0]])
df = pd.DataFrame({'A' : [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=midx)
df.to_panel()
Expand Down
6 changes: 3 additions & 3 deletions doc/source/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1571,9 +1571,9 @@ Setting metadata

Indexes are "mostly immutable", but it is possible to set and change their
metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and
``labels``).
``codes``).

You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_labels``
You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_codes``
to set these attributes directly. They default to returning a copy; however,
you can specify ``inplace=True`` to have the data change in place.

Expand All @@ -1588,7 +1588,7 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
ind.name = "bob"
ind
``set_names``, ``set_levels``, and ``set_labels`` also take an optional
``set_names``, ``set_levels``, and ``set_codes`` also take an optional
`level`` argument

.. ipython:: python
Expand Down
10 changes: 5 additions & 5 deletions doc/source/internals.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,23 @@ MultiIndex
~~~~~~~~~~

Internally, the ``MultiIndex`` consists of a few things: the **levels**, the
integer **labels**, and the level **names**:
integer **codes** (until version 0.24 named *labels*), and the level **names**:

.. ipython:: python
index = pd.MultiIndex.from_product([range(3), ['one', 'two']],
names=['first', 'second'])
index
index.levels
index.labels
index.codes
index.names
You can probably guess that the labels determine which unique element is
You can probably guess that the codes determine which unique element is
identified with that location at each layer of the index. It's important to
note that sortedness is determined **solely** from the integer labels and does
note that sortedness is determined **solely** from the integer codes and does
not check (or care) whether the levels themselves are sorted. Fortunately, the
constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but
if you compute the levels and labels yourself, please be careful.
if you compute the levels and codes yourself, please be careful.

Values
~~~~~~
Expand Down
4 changes: 2 additions & 2 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3728,8 +3728,8 @@ storing/selecting from homogeneous index ``DataFrames``.
index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['foo', 'bar'])
df_mi = pd.DataFrame(np.random.randn(10, 3), index=index,
columns=['A', 'B', 'C'])
Expand Down
7 changes: 7 additions & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,13 @@ Other API Changes
Deprecations
~~~~~~~~~~~~

- :attr:`MultiIndex.labels` has been deprecated and replaced by :attr:`MultiIndex.codes`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't this is valid any longer, you can remove the attr

The functionality is unchanged. The new name better reflects the natures of
these codes and makes the ``MultiIndex`` API more similar to the API for :class:`CategoricalIndex`(:issue:`13443`).
As a consequence, other uses of the name ``labels`` in ``MultiIndex`` have also been deprecated and replaced with ``codes``:
- You should initialize a ``MultiIndex`` instance using a parameter named ``codes`` rather than ``labels``.
- ``MultiIndex.set_labels`` has been deprecated in favor of :meth:`MultiIndex.set_codes`.
- For method :meth:`MultiIndex.copy`, the ``labels`` parameter has been deprecated and replaced by a ``codes`` parameter.
- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`)
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
Expand Down
19 changes: 10 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,7 @@ def to_panel(self):
selfsorted = self

major_axis, minor_axis = selfsorted.index.levels
major_labels, minor_labels = selfsorted.index.labels
major_codes, minor_codes = selfsorted.index.codes
shape = len(major_axis), len(minor_axis)

# preserve names, if any
Expand All @@ -1854,8 +1854,8 @@ def to_panel(self):

# create new manager
new_mgr = selfsorted._data.reshape_nd(axes=new_axes,
labels=[major_labels,
minor_labels],
labels=[major_codes,
minor_codes],
shape=shape,
ref_items=selfsorted.columns)

Expand Down Expand Up @@ -3736,8 +3736,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None,
>>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
... ['speed', 'weight', 'length']],
... labels=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
>>> df = pd.DataFrame(index=midx, columns=['big', 'small'],
... data=[[45, 30], [200, 100], [1.5, 1], [30, 20],
... [250, 150], [1.5, 0.8], [320, 250],
Expand Down Expand Up @@ -4226,7 +4226,7 @@ def _maybe_casted_values(index, labels=None):
if isinstance(self.index, MultiIndex):
names = [n if n is not None else ('level_%d' % i)
for (i, n) in enumerate(self.index.names)]
to_insert = lzip(self.index.levels, self.index.labels)
to_insert = lzip(self.index.levels, self.index.codes)
else:
default = 'index' if 'index' not in self else 'level_0'
names = ([default] if self.index.name is None
Expand Down Expand Up @@ -4594,7 +4594,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
elif isinstance(labels, MultiIndex):
from pandas.core.sorting import lexsort_indexer

indexer = lexsort_indexer(labels._get_labels_for_sorting(),
indexer = lexsort_indexer(labels._get_codes_for_sorting(),
orders=ascending,
na_position=na_position)
else:
Expand Down Expand Up @@ -7147,8 +7147,9 @@ def _count_level(self, level, axis=0, numeric_only=False):
level = count_axis._get_level_number(level)

level_index = count_axis.levels[level]
labels = ensure_int64(count_axis.labels[level])
counts = lib.count_level_2d(mask, labels, len(level_index), axis=0)
level_codes = ensure_int64(count_axis.codes[level])
counts = lib.count_level_2d(mask, level_codes, len(level_index),
axis=0)

result = DataFrame(counts, index=level_index, columns=agg_axis)

Expand Down
10 changes: 5 additions & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,7 +1112,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
lab = cut(Series(val), bins, include_lowest=True)
lev = lab.cat.categories
lab = lev.take(lab.cat.codes)
llab = lambda lab, inc: lab[inc]._multiindex.labels[-1]
llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]

if is_interval_dtype(lab):
# TODO: should we do this inside II?
Expand Down Expand Up @@ -1163,7 +1163,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
out, labels[-1] = out[sorter], labels[-1][sorter]

if bins is None:
mi = MultiIndex(levels=levels, labels=labels, names=names,
mi = MultiIndex(levels=levels, codes=labels, names=names,
verify_integrity=False)

if is_integer_dtype(out):
Expand Down Expand Up @@ -1191,10 +1191,10 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
out, left[-1] = out[sorter], left[-1][sorter]

# build the multi-index w/ full levels
labels = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
labels.append(left[-1])
codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
codes.append(left[-1])

mi = MultiIndex(levels=levels, labels=labels, names=names,
mi = MultiIndex(levels=levels, codes=codes, names=names,
verify_integrity=False)

if is_integer_dtype(out):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,10 +290,10 @@ def result_index(self):
if not self.compressed and len(self.groupings) == 1:
return self.groupings[0].result_index.rename(self.names[0])

labels = self.recons_labels
codes = self.recons_labels
levels = [ping.result_index for ping in self.groupings]
result = MultiIndex(levels=levels,
labels=labels,
codes=codes,
verify_integrity=False,
names=self.names)
return result
Expand Down
Loading