Skip to content

Commit

Permalink
API: rename MultiIndex.labels to MultiIndex.codes (#23752)
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 authored and jreback committed Dec 5, 2018
1 parent 9f2c716 commit aead29b
Show file tree
Hide file tree
Showing 87 changed files with 919 additions and 822 deletions.
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,8 @@ def setup(self):
n1 = 400
n2 = 250
index = MultiIndex(levels=[np.arange(n1), tm.makeStringIndex(n2)],
labels=[np.repeat(range(n1), n2).tolist(),
list(range(n2)) * n1],
codes=[np.repeat(range(n1), n2).tolist(),
list(range(n2)) * n1],
names=['lev1', 'lev2'])
arr = np.random.randn(n1 * n2, 3)
arr[::10000, 0] = np.nan
Expand Down
10 changes: 5 additions & 5 deletions asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,16 @@ class Join(object):
def setup(self, sort):
level1 = tm.makeStringIndex(10).values
level2 = tm.makeStringIndex(1000).values
label1 = np.arange(10).repeat(1000)
label2 = np.tile(np.arange(1000), 10)
codes1 = np.arange(10).repeat(1000)
codes2 = np.tile(np.arange(1000), 10)
index2 = MultiIndex(levels=[level1, level2],
labels=[label1, label2])
codes=[codes1, codes2])
self.df_multi = DataFrame(np.random.randn(len(index2), 4),
index=index2,
columns=['A', 'B', 'C', 'D'])

self.key1 = np.tile(level1.take(label1), 10)
self.key2 = np.tile(level2.take(label2), 10)
self.key1 = np.tile(level1.take(codes1), 10)
self.key2 = np.tile(level2.take(codes2), 10)
self.df = DataFrame({'data1': np.random.randn(100000),
'data2': np.random.randn(100000),
'key1': self.key1,
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/multiindex_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def setup(self):
levels = [np.arange(n),
tm.makeStringIndex(n).values,
1000 + np.arange(n)]
labels = [np.random.choice(n, (k * n)) for lev in levels]
self.mi = MultiIndex(levels=levels, labels=labels)
codes = [np.random.choice(n, (k * n)) for lev in levels]
self.mi = MultiIndex(levels=levels, codes=codes)

def time_duplicated(self):
self.mi.duplicated()
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ class LevelAlign(object):
def setup(self):
self.index = MultiIndex(
levels=[np.arange(10), np.arange(100), np.arange(100)],
labels=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
codes=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
self.df = DataFrame(np.random.randn(len(self.index), 4),
index=self.index)
self.df_level = DataFrame(np.random.randn(100, 4),
Expand Down
16 changes: 8 additions & 8 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ class FrameMultiIndexOps(object):

def setup(self, level, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
labels = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, labels=labels)
codes = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, codes=codes)
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
self.df_func = getattr(df, op)

Expand Down Expand Up @@ -67,10 +67,10 @@ class SeriesMultiIndexOps(object):

def setup(self, level, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
labels = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, labels=labels)
codes = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, codes=codes)
s = pd.Series(np.random.randn(len(index)), index=index)
self.s_func = getattr(s, op)

Expand Down
7 changes: 6 additions & 1 deletion doc/source/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ analysis.

See the :ref:`cookbook<cookbook.multi_index>` for some advanced strategies.

.. versionchanged:: 0.24.0

:attr:`MultiIndex.labels` has been renamed to :attr:`MultiIndex.codes`
and :attr:`MultiIndex.set_labels` to :attr:`MultiIndex.set_codes`.

Creating a MultiIndex (hierarchical index) object
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -469,7 +474,7 @@ values across a level. For instance:
.. ipython:: python
midx = pd.MultiIndex(levels=[['zero', 'one'], ['x', 'y']],
labels=[[1, 1, 0, 0], [1, 0, 1, 0]])
codes=[[1, 1, 0, 0], [1, 0, 1, 0]])
df = pd.DataFrame(np.random.randn(4, 2), index=midx)
df
df2 = df.mean(level=0)
Expand Down
4 changes: 2 additions & 2 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1712,7 +1712,7 @@ MultiIndex Attributes

MultiIndex.names
MultiIndex.levels
MultiIndex.labels
MultiIndex.codes
MultiIndex.nlevels
MultiIndex.levshape

Expand All @@ -1723,7 +1723,7 @@ MultiIndex Components
:toctree: generated/

MultiIndex.set_levels
MultiIndex.set_labels
MultiIndex.set_codes
MultiIndex.to_hierarchical
MultiIndex.to_flat_index
MultiIndex.to_frame
Expand Down
2 changes: 1 addition & 1 deletion doc/source/dsintro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ From DataFrame using ``to_panel`` method
.. ipython:: python
:okwarning:
midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], labels=[[1,1,0,0],[1,0,1,0]])
midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], codes=[[1,1,0,0],[1,0,1,0]])
df = pd.DataFrame({'A' : [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=midx)
df.to_panel()
Expand Down
6 changes: 3 additions & 3 deletions doc/source/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1571,9 +1571,9 @@ Setting metadata

Indexes are "mostly immutable", but it is possible to set and change their
metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and
``labels``).
``codes``).

You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_labels``
You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_codes``
to set these attributes directly. They default to returning a copy; however,
you can specify ``inplace=True`` to have the data change in place.

Expand All @@ -1588,7 +1588,7 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
ind.name = "bob"
ind
``set_names``, ``set_levels``, and ``set_labels`` also take an optional
``set_names``, ``set_levels``, and ``set_codes`` also take an optional
`level`` argument

.. ipython:: python
Expand Down
10 changes: 5 additions & 5 deletions doc/source/internals.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,23 @@ MultiIndex
~~~~~~~~~~

Internally, the ``MultiIndex`` consists of a few things: the **levels**, the
integer **labels**, and the level **names**:
integer **codes** (until version 0.24 named *labels*), and the level **names**:

.. ipython:: python
index = pd.MultiIndex.from_product([range(3), ['one', 'two']],
names=['first', 'second'])
index
index.levels
index.labels
index.codes
index.names
You can probably guess that the labels determine which unique element is
You can probably guess that the codes determine which unique element is
identified with that location at each layer of the index. It's important to
note that sortedness is determined **solely** from the integer labels and does
note that sortedness is determined **solely** from the integer codes and does
not check (or care) whether the levels themselves are sorted. Fortunately, the
constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but
if you compute the levels and labels yourself, please be careful.
if you compute the levels and codes yourself, please be careful.

Values
~~~~~~
Expand Down
4 changes: 2 additions & 2 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3728,8 +3728,8 @@ storing/selecting from homogeneous index ``DataFrames``.
index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['foo', 'bar'])
df_mi = pd.DataFrame(np.random.randn(10, 3), index=index,
columns=['A', 'B', 'C'])
Expand Down
7 changes: 7 additions & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,13 @@ Other API Changes
Deprecations
~~~~~~~~~~~~

- :attr:`MultiIndex.labels` has been deprecated and replaced by :attr:`MultiIndex.codes`.
The functionality is unchanged. The new name better reflects the natures of
these codes and makes the ``MultiIndex`` API more similar to the API for :class:`CategoricalIndex`(:issue:`13443`).
As a consequence, other uses of the name ``labels`` in ``MultiIndex`` have also been deprecated and replaced with ``codes``:
- You should initialize a ``MultiIndex`` instance using a parameter named ``codes`` rather than ``labels``.
- ``MultiIndex.set_labels`` has been deprecated in favor of :meth:`MultiIndex.set_codes`.
- For method :meth:`MultiIndex.copy`, the ``labels`` parameter has been deprecated and replaced by a ``codes`` parameter.
- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`)
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
Expand Down
19 changes: 10 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,7 @@ def to_panel(self):
selfsorted = self

major_axis, minor_axis = selfsorted.index.levels
major_labels, minor_labels = selfsorted.index.labels
major_codes, minor_codes = selfsorted.index.codes
shape = len(major_axis), len(minor_axis)

# preserve names, if any
Expand All @@ -1854,8 +1854,8 @@ def to_panel(self):

# create new manager
new_mgr = selfsorted._data.reshape_nd(axes=new_axes,
labels=[major_labels,
minor_labels],
labels=[major_codes,
minor_codes],
shape=shape,
ref_items=selfsorted.columns)

Expand Down Expand Up @@ -3736,8 +3736,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None,
>>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
... ['speed', 'weight', 'length']],
... labels=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
>>> df = pd.DataFrame(index=midx, columns=['big', 'small'],
... data=[[45, 30], [200, 100], [1.5, 1], [30, 20],
... [250, 150], [1.5, 0.8], [320, 250],
Expand Down Expand Up @@ -4226,7 +4226,7 @@ def _maybe_casted_values(index, labels=None):
if isinstance(self.index, MultiIndex):
names = [n if n is not None else ('level_%d' % i)
for (i, n) in enumerate(self.index.names)]
to_insert = lzip(self.index.levels, self.index.labels)
to_insert = lzip(self.index.levels, self.index.codes)
else:
default = 'index' if 'index' not in self else 'level_0'
names = ([default] if self.index.name is None
Expand Down Expand Up @@ -4594,7 +4594,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
elif isinstance(labels, MultiIndex):
from pandas.core.sorting import lexsort_indexer

indexer = lexsort_indexer(labels._get_labels_for_sorting(),
indexer = lexsort_indexer(labels._get_codes_for_sorting(),
orders=ascending,
na_position=na_position)
else:
Expand Down Expand Up @@ -7147,8 +7147,9 @@ def _count_level(self, level, axis=0, numeric_only=False):
level = count_axis._get_level_number(level)

level_index = count_axis.levels[level]
labels = ensure_int64(count_axis.labels[level])
counts = lib.count_level_2d(mask, labels, len(level_index), axis=0)
level_codes = ensure_int64(count_axis.codes[level])
counts = lib.count_level_2d(mask, level_codes, len(level_index),
axis=0)

result = DataFrame(counts, index=level_index, columns=agg_axis)

Expand Down
10 changes: 5 additions & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,7 +1112,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
lab = cut(Series(val), bins, include_lowest=True)
lev = lab.cat.categories
lab = lev.take(lab.cat.codes)
llab = lambda lab, inc: lab[inc]._multiindex.labels[-1]
llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]

if is_interval_dtype(lab):
# TODO: should we do this inside II?
Expand Down Expand Up @@ -1163,7 +1163,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
out, labels[-1] = out[sorter], labels[-1][sorter]

if bins is None:
mi = MultiIndex(levels=levels, labels=labels, names=names,
mi = MultiIndex(levels=levels, codes=labels, names=names,
verify_integrity=False)

if is_integer_dtype(out):
Expand Down Expand Up @@ -1191,10 +1191,10 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
out, left[-1] = out[sorter], left[-1][sorter]

# build the multi-index w/ full levels
labels = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
labels.append(left[-1])
codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
codes.append(left[-1])

mi = MultiIndex(levels=levels, labels=labels, names=names,
mi = MultiIndex(levels=levels, codes=codes, names=names,
verify_integrity=False)

if is_integer_dtype(out):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,10 +290,10 @@ def result_index(self):
if not self.compressed and len(self.groupings) == 1:
return self.groupings[0].result_index.rename(self.names[0])

labels = self.recons_labels
codes = self.recons_labels
levels = [ping.result_index for ping in self.groupings]
result = MultiIndex(levels=levels,
labels=labels,
codes=codes,
verify_integrity=False,
names=self.names)
return result
Expand Down
Loading

0 comments on commit aead29b

Please sign in to comment.