From aead29b745b48af0cac5fc7f677120e9a95049f4 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 5 Dec 2018 19:06:00 +0000 Subject: [PATCH] API: rename MultiIndex.labels to MultiIndex.codes (#23752) --- asv_bench/benchmarks/groupby.py | 4 +- asv_bench/benchmarks/join_merge.py | 10 +- asv_bench/benchmarks/multiindex_object.py | 4 +- asv_bench/benchmarks/reindex.py | 6 +- asv_bench/benchmarks/stat_ops.py | 16 +- doc/source/advanced.rst | 7 +- doc/source/api.rst | 4 +- doc/source/dsintro.rst | 2 +- doc/source/indexing.rst | 6 +- doc/source/internals.rst | 10 +- doc/source/io.rst | 4 +- doc/source/whatsnew/v0.24.0.rst | 7 + pandas/core/frame.py | 19 +- pandas/core/groupby/generic.py | 10 +- pandas/core/groupby/ops.py | 4 +- pandas/core/indexes/base.py | 48 +- pandas/core/indexes/multi.py | 494 ++++++++++-------- pandas/core/panel.py | 30 +- pandas/core/reshape/concat.py | 24 +- pandas/core/reshape/merge.py | 40 +- pandas/core/reshape/reshape.py | 88 ++-- pandas/core/series.py | 14 +- pandas/core/sparse/frame.py | 6 +- pandas/core/util/hashing.py | 2 +- pandas/core/window.py | 2 +- pandas/io/formats/excel.py | 14 +- pandas/io/pytables.py | 16 +- pandas/tests/frame/test_alter_axes.py | 26 +- pandas/tests/frame/test_analytics.py | 6 +- pandas/tests/frame/test_indexing.py | 2 +- pandas/tests/frame/test_reshape.py | 34 +- pandas/tests/groupby/conftest.py | 4 +- pandas/tests/groupby/test_categorical.py | 4 +- pandas/tests/groupby/test_counting.py | 2 +- pandas/tests/groupby/test_function.py | 6 +- pandas/tests/groupby/test_groupby.py | 10 +- pandas/tests/groupby/test_grouping.py | 8 +- pandas/tests/groupby/test_whitelist.py | 8 +- pandas/tests/indexes/multi/conftest.py | 12 +- pandas/tests/indexes/multi/test_analytics.py | 18 +- pandas/tests/indexes/multi/test_astype.py | 2 +- pandas/tests/indexes/multi/test_compat.py | 12 +- .../tests/indexes/multi/test_constructor.py | 75 +-- pandas/tests/indexes/multi/test_contains.py | 2 +- pandas/tests/indexes/multi/test_conversion.py | 10 +- pandas/tests/indexes/multi/test_copy.py | 22 +- pandas/tests/indexes/multi/test_drop.py | 4 +- pandas/tests/indexes/multi/test_duplicates.py | 36 +- .../tests/indexes/multi/test_equivalence.py | 16 +- pandas/tests/indexes/multi/test_format.py | 9 +- pandas/tests/indexes/multi/test_get_set.py | 160 +++--- pandas/tests/indexes/multi/test_indexing.py | 20 +- pandas/tests/indexes/multi/test_integrity.py | 32 +- pandas/tests/indexes/multi/test_missing.py | 4 +- pandas/tests/indexes/multi/test_monotonic.py | 24 +- pandas/tests/indexes/multi/test_names.py | 6 +- pandas/tests/indexes/multi/test_sorting.py | 8 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexing/multiindex/conftest.py | 4 +- .../tests/indexing/multiindex/test_getitem.py | 12 +- pandas/tests/indexing/multiindex/test_loc.py | 6 +- .../tests/indexing/multiindex/test_partial.py | 12 +- .../tests/indexing/multiindex/test_setitem.py | 6 +- .../tests/indexing/multiindex/test_sorted.py | 2 +- pandas/tests/internals/test_internals.py | 4 +- pandas/tests/io/formats/test_to_csv.py | 4 +- pandas/tests/io/formats/test_to_html.py | 4 +- pandas/tests/io/parser/test_header.py | 8 +- pandas/tests/io/parser/test_index_col.py | 2 +- pandas/tests/io/test_excel.py | 6 +- pandas/tests/io/test_feather.py | 16 +- pandas/tests/io/test_html.py | 4 +- pandas/tests/io/test_pytables.py | 12 +- pandas/tests/reshape/merge/test_join.py | 4 +- pandas/tests/reshape/merge/test_multi.py | 8 +- pandas/tests/reshape/test_concat.py | 20 +- pandas/tests/reshape/test_pivot.py | 14 +- pandas/tests/reshape/test_reshape.py | 2 +- pandas/tests/series/indexing/test_indexing.py | 4 +- pandas/tests/series/test_alter_axes.py | 12 +- pandas/tests/series/test_analytics.py | 12 +- pandas/tests/series/test_repr.py | 4 +- pandas/tests/series/test_timeseries.py | 4 +- pandas/tests/test_multilevel.py | 60 +-- pandas/tests/test_panel.py | 16 +- pandas/tests/util/test_hashing.py | 2 +- pandas/util/testing.py | 2 +- 87 files changed, 919 insertions(+), 822 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index ee5ae69555d16..59e43ee22afde 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -473,8 +473,8 @@ def setup(self): n1 = 400 n2 = 250 index = MultiIndex(levels=[np.arange(n1), tm.makeStringIndex(n2)], - labels=[np.repeat(range(n1), n2).tolist(), - list(range(n2)) * n1], + codes=[np.repeat(range(n1), n2).tolist(), + list(range(n2)) * n1], names=['lev1', 'lev2']) arr = np.random.randn(n1 * n2, 3) arr[::10000, 0] = np.nan diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 84ccc10e8302f..88a59fea375ea 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -115,16 +115,16 @@ class Join(object): def setup(self, sort): level1 = tm.makeStringIndex(10).values level2 = tm.makeStringIndex(1000).values - label1 = np.arange(10).repeat(1000) - label2 = np.tile(np.arange(1000), 10) + codes1 = np.arange(10).repeat(1000) + codes2 = np.tile(np.arange(1000), 10) index2 = MultiIndex(levels=[level1, level2], - labels=[label1, label2]) + codes=[codes1, codes2]) self.df_multi = DataFrame(np.random.randn(len(index2), 4), index=index2, columns=['A', 'B', 'C', 'D']) - self.key1 = np.tile(level1.take(label1), 10) - self.key2 = np.tile(level2.take(label2), 10) + self.key1 = np.tile(level1.take(codes1), 10) + self.key2 = np.tile(level2.take(codes2), 10) self.df = DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py index ff202322dbe84..adc6730dcd946 100644 --- a/asv_bench/benchmarks/multiindex_object.py +++ b/asv_bench/benchmarks/multiindex_object.py @@ -79,8 +79,8 @@ def setup(self): levels = [np.arange(n), tm.makeStringIndex(n).values, 1000 + np.arange(n)] - labels = [np.random.choice(n, (k * n)) for lev in levels] - self.mi = MultiIndex(levels=levels, labels=labels) + codes = [np.random.choice(n, (k * n)) for lev in levels] + self.mi = MultiIndex(levels=levels, codes=codes) def time_duplicated(self): self.mi.duplicated() diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 82c61a98e2c34..576dc495eb984 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -71,9 +71,9 @@ class LevelAlign(object): def setup(self): self.index = MultiIndex( levels=[np.arange(10), np.arange(100), np.arange(100)], - labels=[np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)]) + codes=[np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10)]) self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) self.df_level = DataFrame(np.random.randn(100, 4), diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 66ded52ca35b2..500e4d74d4c4f 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -31,10 +31,10 @@ class FrameMultiIndexOps(object): def setup(self, level, op): levels = [np.arange(10), np.arange(100), np.arange(100)] - labels = [np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)] - index = pd.MultiIndex(levels=levels, labels=labels) + codes = [np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10)] + index = pd.MultiIndex(levels=levels, codes=codes) df = pd.DataFrame(np.random.randn(len(index), 4), index=index) self.df_func = getattr(df, op) @@ -67,10 +67,10 @@ class SeriesMultiIndexOps(object): def setup(self, level, op): levels = [np.arange(10), np.arange(100), np.arange(100)] - labels = [np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)] - index = pd.MultiIndex(levels=levels, labels=labels) + codes = [np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10)] + index = pd.MultiIndex(levels=levels, codes=codes) s = pd.Series(np.random.randn(len(index)), index=index) self.s_func = getattr(s, op) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 24a1ac7be7d1d..39082ef7a4c69 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -49,6 +49,11 @@ analysis. See the :ref:`cookbook` for some advanced strategies. +.. versionchanged:: 0.24.0 + + :attr:`MultiIndex.labels` has been renamed to :attr:`MultiIndex.codes` + and :attr:`MultiIndex.set_labels` to :attr:`MultiIndex.set_codes`. + Creating a MultiIndex (hierarchical index) object ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -469,7 +474,7 @@ values across a level. For instance: .. ipython:: python midx = pd.MultiIndex(levels=[['zero', 'one'], ['x', 'y']], - labels=[[1, 1, 0, 0], [1, 0, 1, 0]]) + codes=[[1, 1, 0, 0], [1, 0, 1, 0]]) df = pd.DataFrame(np.random.randn(4, 2), index=midx) df df2 = df.mean(level=0) diff --git a/doc/source/api.rst b/doc/source/api.rst index 82ae58acc4974..1a23587d2ebb5 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1712,7 +1712,7 @@ MultiIndex Attributes MultiIndex.names MultiIndex.levels - MultiIndex.labels + MultiIndex.codes MultiIndex.nlevels MultiIndex.levshape @@ -1723,7 +1723,7 @@ MultiIndex Components :toctree: generated/ MultiIndex.set_levels - MultiIndex.set_labels + MultiIndex.set_codes MultiIndex.to_hierarchical MultiIndex.to_flat_index MultiIndex.to_frame diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 6195212873e75..968b30d7e9e2b 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -961,7 +961,7 @@ From DataFrame using ``to_panel`` method .. ipython:: python :okwarning: - midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], labels=[[1,1,0,0],[1,0,1,0]]) + midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], codes=[[1,1,0,0],[1,0,1,0]]) df = pd.DataFrame({'A' : [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=midx) df.to_panel() diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index dc0c6dd027b3c..6ad9c573249a3 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -1571,9 +1571,9 @@ Setting metadata Indexes are "mostly immutable", but it is possible to set and change their metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and -``labels``). +``codes``). -You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_labels`` +You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_codes`` to set these attributes directly. They default to returning a copy; however, you can specify ``inplace=True`` to have the data change in place. @@ -1588,7 +1588,7 @@ See :ref:`Advanced Indexing ` for usage of MultiIndexes. ind.name = "bob" ind -``set_names``, ``set_levels``, and ``set_labels`` also take an optional +``set_names``, ``set_levels``, and ``set_codes`` also take an optional `level`` argument .. ipython:: python diff --git a/doc/source/internals.rst b/doc/source/internals.rst index fdf18aa47416b..c39dafa88db92 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -74,7 +74,7 @@ MultiIndex ~~~~~~~~~~ Internally, the ``MultiIndex`` consists of a few things: the **levels**, the -integer **labels**, and the level **names**: +integer **codes** (until version 0.24 named *labels*), and the level **names**: .. ipython:: python @@ -82,15 +82,15 @@ integer **labels**, and the level **names**: names=['first', 'second']) index index.levels - index.labels + index.codes index.names -You can probably guess that the labels determine which unique element is +You can probably guess that the codes determine which unique element is identified with that location at each layer of the index. It's important to -note that sortedness is determined **solely** from the integer labels and does +note that sortedness is determined **solely** from the integer codes and does not check (or care) whether the levels themselves are sorted. Fortunately, the constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but -if you compute the levels and labels yourself, please be careful. +if you compute the levels and codes yourself, please be careful. Values ~~~~~~ diff --git a/doc/source/io.rst b/doc/source/io.rst index fbd238586c776..313c4d723d079 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3728,8 +3728,8 @@ storing/selecting from homogeneous index ``DataFrames``. index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) df_mi = pd.DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index eab5956735f12..090127f50c6c2 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1100,6 +1100,13 @@ Other API Changes Deprecations ~~~~~~~~~~~~ +- :attr:`MultiIndex.labels` has been deprecated and replaced by :attr:`MultiIndex.codes`. + The functionality is unchanged. The new name better reflects the natures of + these codes and makes the ``MultiIndex`` API more similar to the API for :class:`CategoricalIndex`(:issue:`13443`). + As a consequence, other uses of the name ``labels`` in ``MultiIndex`` have also been deprecated and replaced with ``codes``: + - You should initialize a ``MultiIndex`` instance using a parameter named ``codes`` rather than ``labels``. + - ``MultiIndex.set_labels`` has been deprecated in favor of :meth:`MultiIndex.set_codes`. + - For method :meth:`MultiIndex.copy`, the ``labels`` parameter has been deprecated and replaced by a ``codes`` parameter. - :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`) - :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2c1fa5ef4439e..9d27d17014a56 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1839,7 +1839,7 @@ def to_panel(self): selfsorted = self major_axis, minor_axis = selfsorted.index.levels - major_labels, minor_labels = selfsorted.index.labels + major_codes, minor_codes = selfsorted.index.codes shape = len(major_axis), len(minor_axis) # preserve names, if any @@ -1854,8 +1854,8 @@ def to_panel(self): # create new manager new_mgr = selfsorted._data.reshape_nd(axes=new_axes, - labels=[major_labels, - minor_labels], + labels=[major_codes, + minor_codes], shape=shape, ref_items=selfsorted.columns) @@ -3736,8 +3736,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None, >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], - ... labels=[[0, 0, 0, 1, 1, 1, 2, 2, 2], - ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], + ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) >>> df = pd.DataFrame(index=midx, columns=['big', 'small'], ... data=[[45, 30], [200, 100], [1.5, 1], [30, 20], ... [250, 150], [1.5, 0.8], [320, 250], @@ -4226,7 +4226,7 @@ def _maybe_casted_values(index, labels=None): if isinstance(self.index, MultiIndex): names = [n if n is not None else ('level_%d' % i) for (i, n) in enumerate(self.index.names)] - to_insert = lzip(self.index.levels, self.index.labels) + to_insert = lzip(self.index.levels, self.index.codes) else: default = 'index' if 'index' not in self else 'level_0' names = ([default] if self.index.name is None @@ -4594,7 +4594,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, elif isinstance(labels, MultiIndex): from pandas.core.sorting import lexsort_indexer - indexer = lexsort_indexer(labels._get_labels_for_sorting(), + indexer = lexsort_indexer(labels._get_codes_for_sorting(), orders=ascending, na_position=na_position) else: @@ -7147,8 +7147,9 @@ def _count_level(self, level, axis=0, numeric_only=False): level = count_axis._get_level_number(level) level_index = count_axis.levels[level] - labels = ensure_int64(count_axis.labels[level]) - counts = lib.count_level_2d(mask, labels, len(level_index), axis=0) + level_codes = ensure_int64(count_axis.codes[level]) + counts = lib.count_level_2d(mask, level_codes, len(level_index), + axis=0) result = DataFrame(counts, index=level_index, columns=agg_axis) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a17e2ce7f1ef5..26e437355fa8b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1112,7 +1112,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, lab = cut(Series(val), bins, include_lowest=True) lev = lab.cat.categories lab = lev.take(lab.cat.codes) - llab = lambda lab, inc: lab[inc]._multiindex.labels[-1] + llab = lambda lab, inc: lab[inc]._multiindex.codes[-1] if is_interval_dtype(lab): # TODO: should we do this inside II? @@ -1163,7 +1163,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, out, labels[-1] = out[sorter], labels[-1][sorter] if bins is None: - mi = MultiIndex(levels=levels, labels=labels, names=names, + mi = MultiIndex(levels=levels, codes=labels, names=names, verify_integrity=False) if is_integer_dtype(out): @@ -1191,10 +1191,10 @@ def value_counts(self, normalize=False, sort=True, ascending=False, out, left[-1] = out[sorter], left[-1][sorter] # build the multi-index w/ full levels - labels = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1])) - labels.append(left[-1]) + codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1])) + codes.append(left[-1]) - mi = MultiIndex(levels=levels, labels=labels, names=names, + mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False) if is_integer_dtype(out): diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 8455c03953ad1..87f48d5a40554 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -290,10 +290,10 @@ def result_index(self): if not self.compressed and len(self.groupings) == 1: return self.groupings[0].result_index.rename(self.names[0]) - labels = self.recons_labels + codes = self.recons_labels levels = [ping.result_index for ping in self.groupings] result = MultiIndex(levels=levels, - labels=labels, + codes=codes, verify_integrity=False, names=self.names) return result diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a5b8e22070923..88510e84a29a5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1521,19 +1521,19 @@ def droplevel(self, level=0): # The two checks above guarantee that here self is a MultiIndex new_levels = list(self.levels) - new_labels = list(self.labels) + new_codes = list(self.codes) new_names = list(self.names) for i in levnums: new_levels.pop(i) - new_labels.pop(i) + new_codes.pop(i) new_names.pop(i) if len(new_levels) == 1: # set nan if needed - mask = new_labels[0] == -1 - result = new_levels[0].take(new_labels[0]) + mask = new_codes[0] == -1 + result = new_levels[0].take(new_codes[0]) if mask.any(): result = result.putmask(mask, np.nan) @@ -1541,7 +1541,7 @@ def droplevel(self, level=0): return result else: from .multi import MultiIndex - return MultiIndex(levels=new_levels, labels=new_labels, + return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) _index_shared_docs['_get_grouper_for_level'] = """ @@ -3299,14 +3299,14 @@ def _join_multi(self, other, how, return_indexers=True): # common levels, ldrop_names, rdrop_names dropped_names = ldrop_names + rdrop_names - levels, labels, names = ( + levels, codes, names = ( _restore_dropped_levels_multijoin(self, other, dropped_names, join_idx, lidx, ridx)) # Re-create the multi-index - multi_join_idx = MultiIndex(levels=levels, labels=labels, + multi_join_idx = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False) multi_join_idx = multi_join_idx.remove_unused_levels() @@ -3417,7 +3417,7 @@ def _get_leaf_sorter(labels): left_indexer = None join_index = left else: # sort the leaves - left_indexer = _get_leaf_sorter(left.labels[:level + 1]) + left_indexer = _get_leaf_sorter(left.codes[:level + 1]) join_index = left[left_indexer] else: @@ -3425,55 +3425,55 @@ def _get_leaf_sorter(labels): rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) - new_lev_labels = algos.take_nd(rev_indexer, left.labels[level], - allow_fill=False) + new_lev_codes = algos.take_nd(rev_indexer, left.codes[level], + allow_fill=False) - new_labels = list(left.labels) - new_labels[level] = new_lev_labels + new_codes = list(left.codes) + new_codes[level] = new_lev_codes new_levels = list(left.levels) new_levels[level] = new_level if keep_order: # just drop missing values. o.w. keep order left_indexer = np.arange(len(left), dtype=np.intp) - mask = new_lev_labels != -1 + mask = new_lev_codes != -1 if not mask.all(): - new_labels = [lab[mask] for lab in new_labels] + new_codes = [lab[mask] for lab in new_codes] left_indexer = left_indexer[mask] else: # tie out the order with other if level == 0: # outer most level, take the fast route - ngroups = 1 + new_lev_labels.max() + ngroups = 1 + new_lev_codes.max() left_indexer, counts = libalgos.groupsort_indexer( - new_lev_labels, ngroups) + new_lev_codes, ngroups) # missing values are placed first; drop them! left_indexer = left_indexer[counts[0]:] - new_labels = [lab[left_indexer] for lab in new_labels] + new_codes = [lab[left_indexer] for lab in new_codes] else: # sort the leaves - mask = new_lev_labels != -1 + mask = new_lev_codes != -1 mask_all = mask.all() if not mask_all: - new_labels = [lab[mask] for lab in new_labels] + new_codes = [lab[mask] for lab in new_codes] - left_indexer = _get_leaf_sorter(new_labels[:level + 1]) - new_labels = [lab[left_indexer] for lab in new_labels] + left_indexer = _get_leaf_sorter(new_codes[:level + 1]) + new_codes = [lab[left_indexer] for lab in new_codes] # left_indexers are w.r.t masked frame. # reverse to original frame! if not mask_all: left_indexer = mask.nonzero()[0][left_indexer] - join_index = MultiIndex(levels=new_levels, labels=new_labels, + join_index = MultiIndex(levels=new_levels, codes=new_codes, names=left.names, verify_integrity=False) if right_lev_indexer is not None: right_indexer = algos.take_nd(right_lev_indexer, - join_index.labels[level], + join_index.codes[level], allow_fill=False) else: - right_indexer = join_index.labels[level] + right_indexer = join_index.codes[level] if flip_order: left_indexer, right_indexer = right_indexer, left_indexer diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4e5894916bd44..5e26a3c6c439e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -11,7 +11,7 @@ from pandas.compat import lrange, lzip, map, range, zip from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.util._decorators import Appender, cache_readonly +from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_categorical_dtype, is_hashable, @@ -126,8 +126,15 @@ class MultiIndex(Index): ---------- levels : sequence of arrays The unique labels for each level + codes : sequence of arrays + Integers for each level designating which label at each location + + .. versionadded:: 0.24.0 labels : sequence of arrays Integers for each level designating which label at each location + + .. deprecated:: 0.24.0 + Use ``codes`` instead sortorder : optional int Level of sortedness (must be lexicographically sorted by that level) @@ -136,7 +143,7 @@ class MultiIndex(Index): copy : boolean, default False Copy the meta-data verify_integrity : boolean, default True - Check that the levels/labels are consistent and valid + Check that the levels/codes are consistent and valid Examples --------- @@ -170,7 +177,7 @@ class MultiIndex(Index): ---------- names levels - labels + codes nlevels levshape @@ -180,7 +187,7 @@ class MultiIndex(Index): from_tuples from_product set_levels - set_labels + set_codes to_frame to_flat_index is_lexsorted @@ -195,32 +202,33 @@ class MultiIndex(Index): _typ = 'multiindex' _names = FrozenList() _levels = FrozenList() - _labels = FrozenList() + _codes = FrozenList() _comparables = ['names'] rename = Index.set_names # -------------------------------------------------------------------- # Constructors - def __new__(cls, levels=None, labels=None, sortorder=None, names=None, + @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes') + def __new__(cls, levels=None, codes=None, sortorder=None, names=None, dtype=None, copy=False, name=None, verify_integrity=True, _set_identity=True): # compat with Index if name is not None: names = name - if levels is None or labels is None: - raise TypeError("Must pass both levels and labels") - if len(levels) != len(labels): - raise ValueError('Length of levels and labels must be the same.') + if levels is None or codes is None: + raise TypeError("Must pass both levels and codes") + if len(levels) != len(codes): + raise ValueError('Length of levels and codes must be the same.') if len(levels) == 0: - raise ValueError('Must pass non-zero number of levels/labels') + raise ValueError('Must pass non-zero number of levels/codes') result = object.__new__(MultiIndex) - # we've already validated levels and labels, so shortcut here + # we've already validated levels and codes, so shortcut here result._set_levels(levels, copy=copy, validate=False) - result._set_labels(labels, copy=copy, validate=False) + result._set_codes(codes, copy=copy, validate=False) if names is not None: # handles name validation @@ -237,39 +245,39 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, result._reset_identity() return result - def _verify_integrity(self, labels=None, levels=None): + def _verify_integrity(self, codes=None, levels=None): """ Parameters ---------- - labels : optional list - Labels to check for validity. Defaults to current labels. + codes : optional list + Codes to check for validity. Defaults to current codes. levels : optional list Levels to check for validity. Defaults to current levels. Raises ------ ValueError - If length of levels and labels don't match, if any label would - exceed level bounds, or there are any duplicate levels. + If length of levels and codes don't match, if the codes for any + level would exceed level bounds, or there are any duplicate levels. """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. - labels = labels or self.labels + codes = codes or self.codes levels = levels or self.levels - if len(levels) != len(labels): - raise ValueError("Length of levels and labels must match. NOTE:" + if len(levels) != len(codes): + raise ValueError("Length of levels and codes must match. NOTE:" " this index is in an inconsistent state.") - label_length = len(self.labels[0]) - for i, (level, label) in enumerate(zip(levels, labels)): - if len(label) != label_length: - raise ValueError("Unequal label lengths: %s" % - ([len(lab) for lab in labels])) - if len(label) and label.max() >= len(level): - raise ValueError("On level %d, label max (%d) >= length of" + codes_length = len(self.codes[0]) + for i, (level, level_codes) in enumerate(zip(levels, codes)): + if len(level_codes) != codes_length: + raise ValueError("Unequal code lengths: %s" % + ([len(code_) for code_ in codes])) + if len(level_codes) and level_codes.max() >= len(level): + raise ValueError("On level %d, code max (%d) >= length of" " level (%d). NOTE: this index is in an" - " inconsistent state" % (i, label.max(), + " inconsistent state" % (i, level_codes.max(), len(level))) if not level.is_unique: raise ValueError("Level values must be unique: {values} on " @@ -319,11 +327,11 @@ def from_arrays(cls, arrays, sortorder=None, names=None): from pandas.core.arrays.categorical import _factorize_from_iterables - labels, levels = _factorize_from_iterables(arrays) + codes, levels = _factorize_from_iterables(arrays) if names is None: names = [getattr(arr, "name", None) for arr in arrays] - return MultiIndex(levels=levels, labels=labels, sortorder=sortorder, + return MultiIndex(levels=levels, codes=codes, sortorder=sortorder, names=names, verify_integrity=False) @classmethod @@ -419,9 +427,9 @@ def from_product(cls, iterables, sortorder=None, names=None): elif is_iterator(iterables): iterables = list(iterables) - labels, levels = _factorize_from_iterables(iterables) - labels = cartesian_product(labels) - return MultiIndex(levels, labels, sortorder=sortorder, names=names) + codes, levels = _factorize_from_iterables(iterables) + codes = cartesian_product(codes) + return MultiIndex(levels, codes, sortorder=sortorder, names=names) # -------------------------------------------------------------------- @@ -519,7 +527,7 @@ def set_levels(self, levels, level=None, inplace=False, inplace : bool if True, mutates in place verify_integrity : bool (default True) - if True, checks that levels and labels are compatible + if True, checks that levels and codes are compatible Returns ------- @@ -571,54 +579,74 @@ def set_levels(self, levels, level=None, inplace=False, if not inplace: return idx + @property + def codes(self): + return self._codes + @property def labels(self): - return self._labels + warnings.warn((".labels was deprecated in version 0.24.0. " + "Use .codes instead."), + FutureWarning, stacklevel=2) + return self.codes - def _set_labels(self, labels, level=None, copy=False, validate=True, - verify_integrity=False): + def _set_codes(self, codes, level=None, copy=False, validate=True, + verify_integrity=False): - if validate and level is None and len(labels) != self.nlevels: - raise ValueError("Length of labels must match number of levels") - if validate and level is not None and len(labels) != len(level): - raise ValueError('Length of labels must match length of levels.') + if validate and level is None and len(codes) != self.nlevels: + raise ValueError("Length of codes must match number of levels") + if validate and level is not None and len(codes) != len(level): + raise ValueError('Length of codes must match length of levels.') if level is None: - new_labels = FrozenList( - _ensure_frozen(lab, lev, copy=copy)._shallow_copy() - for lev, lab in zip(self.levels, labels)) + new_codes = FrozenList( + _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy() + for lev, level_codes in zip(self.levels, codes)) else: level = [self._get_level_number(l) for l in level] - new_labels = list(self._labels) - for lev_idx, lab in zip(level, labels): + new_codes = list(self._codes) + for lev_idx, level_codes in zip(level, codes): lev = self.levels[lev_idx] - new_labels[lev_idx] = _ensure_frozen( - lab, lev, copy=copy)._shallow_copy() - new_labels = FrozenList(new_labels) + new_codes[lev_idx] = _ensure_frozen( + level_codes, lev, copy=copy)._shallow_copy() + new_codes = FrozenList(new_codes) if verify_integrity: - self._verify_integrity(labels=new_labels) + self._verify_integrity(codes=new_codes) - self._labels = new_labels + self._codes = new_codes self._tuples = None self._reset_cache() def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): + warnings.warn((".set_labels was deprecated in version 0.24.0. " + "Use .set_codes instead."), + FutureWarning, stacklevel=2) + return self.set_codes(codes=labels, level=level, inplace=inplace, + verify_integrity=verify_integrity) + + @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes') + def set_codes(self, codes, level=None, inplace=False, + verify_integrity=True): """ - Set new labels on MultiIndex. Defaults to returning + Set new codes on MultiIndex. Defaults to returning new index. + .. versionadded:: 0.24.0 + + New name for deprecated method `set_labels`. + Parameters ---------- - labels : sequence or list of sequence - new labels to apply + codes : sequence or list of sequence + new codes to apply level : int, level name, or sequence of int/level names (default None) level(s) to set (None for all levels) inplace : bool if True, mutates in place verify_integrity : bool (default True) - if True, checks that levels and labels are compatible + if True, checks that levels and codes are compatible Returns ------- @@ -629,47 +657,48 @@ def set_labels(self, labels, level=None, inplace=False, >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'), (2, u'one'), (2, u'two')], names=['foo', 'bar']) - >>> idx.set_labels([[1,0,1,0], [0,0,1,1]]) + >>> idx.set_codes([[1,0,1,0], [0,0,1,1]]) MultiIndex(levels=[[1, 2], [u'one', u'two']], labels=[[1, 0, 1, 0], [0, 0, 1, 1]], names=[u'foo', u'bar']) - >>> idx.set_labels([1,0,1,0], level=0) + >>> idx.set_codes([1,0,1,0], level=0) MultiIndex(levels=[[1, 2], [u'one', u'two']], labels=[[1, 0, 1, 0], [0, 1, 0, 1]], names=[u'foo', u'bar']) - >>> idx.set_labels([0,0,1,1], level='bar') + >>> idx.set_codes([0,0,1,1], level='bar') MultiIndex(levels=[[1, 2], [u'one', u'two']], labels=[[0, 0, 1, 1], [0, 0, 1, 1]], names=[u'foo', u'bar']) - >>> idx.set_labels([[1,0,1,0], [0,0,1,1]], level=[0,1]) + >>> idx.set_codes([[1,0,1,0], [0,0,1,1]], level=[0,1]) MultiIndex(levels=[[1, 2], [u'one', u'two']], labels=[[1, 0, 1, 0], [0, 0, 1, 1]], names=[u'foo', u'bar']) """ if level is not None and not is_list_like(level): - if not is_list_like(labels): - raise TypeError("Labels must be list-like") - if is_list_like(labels[0]): - raise TypeError("Labels must be list-like") + if not is_list_like(codes): + raise TypeError("Codes must be list-like") + if is_list_like(codes[0]): + raise TypeError("Codes must be list-like") level = [level] - labels = [labels] + codes = [codes] elif level is None or is_list_like(level): - if not is_list_like(labels) or not is_list_like(labels[0]): - raise TypeError("Labels must be list of lists-like") + if not is_list_like(codes) or not is_list_like(codes[0]): + raise TypeError("Codes must be list of lists-like") if inplace: idx = self else: idx = self._shallow_copy() idx._reset_identity() - idx._set_labels(labels, level=level, verify_integrity=verify_integrity) + idx._set_codes(codes, level=level, verify_integrity=verify_integrity) if not inplace: return idx - def copy(self, names=None, dtype=None, levels=None, labels=None, + @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes') + def copy(self, names=None, dtype=None, levels=None, codes=None, deep=False, _set_identity=False, **kwargs): """ - Make a copy of this object. Names, dtype, levels and labels can be + Make a copy of this object. Names, dtype, levels and codes can be passed and will be set on new copy. Parameters @@ -677,7 +706,7 @@ def copy(self, names=None, dtype=None, levels=None, labels=None, names : sequence, optional dtype : numpy dtype or pandas type, optional levels : sequence, optional - labels : sequence, optional + codes : sequence, optional Returns ------- @@ -696,14 +725,14 @@ def copy(self, names=None, dtype=None, levels=None, labels=None, from copy import deepcopy if levels is None: levels = deepcopy(self.levels) - if labels is None: - labels = deepcopy(self.labels) + if codes is None: + codes = deepcopy(self.codes) else: if levels is None: levels = self.levels - if labels is None: - labels = self.labels - return MultiIndex(levels=levels, labels=labels, names=names, + if codes is None: + codes = self.codes + return MultiIndex(levels=levels, codes=codes, names=names, sortorder=self.sortorder, verify_integrity=False, _set_identity=_set_identity) @@ -722,7 +751,7 @@ def _shallow_copy_with_infer(self, values, **kwargs): # Therefore, an empty MultiIndex is returned GH13490 if len(values) == 0: return MultiIndex(levels=[[] for _ in range(self.nlevels)], - labels=[[] for _ in range(self.nlevels)], + codes=[[] for _ in range(self.nlevels)], **kwargs) return self._shallow_copy(values, **kwargs) @@ -783,7 +812,7 @@ def _nbytes(self, deep=False): objsize = 24 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) - label_nbytes = sum(i.nbytes for i in self.labels) + label_nbytes = sum(i.nbytes for i in self.codes) names_nbytes = sum(getsizeof(i, objsize) for i in self.names) result = level_nbytes + label_nbytes + names_nbytes @@ -801,7 +830,7 @@ def _format_attrs(self): attrs = [ ('levels', ibase.default_pprint(self._levels, max_seq_items=False)), - ('labels', ibase.default_pprint(self._labels, + ('labels', ibase.default_pprint(self._codes, max_seq_items=False))] if com._any_not_none(*self.names): attrs.append(('names', ibase.default_pprint(self.names))) @@ -818,26 +847,26 @@ def _format_data(self, name=None): def _format_native_types(self, na_rep='nan', **kwargs): new_levels = [] - new_labels = [] + new_codes = [] # go through the levels and format them - for level, label in zip(self.levels, self.labels): + for level, level_codes in zip(self.levels, self.codes): level = level._format_native_types(na_rep=na_rep, **kwargs) # add nan values, if there are any - mask = (label == -1) + mask = (level_codes == -1) if mask.any(): nan_index = len(level) level = np.append(level, na_rep) - label = label.values() - label[mask] = nan_index + level_codes = level_codes.values() + level_codes[mask] = nan_index new_levels.append(level) - new_labels.append(label) + new_codes.append(level_codes) if len(new_levels) == 1: return Index(new_levels[0])._format_native_types() else: # reconstruct the multi-index - mi = MultiIndex(levels=new_levels, labels=new_labels, + mi = MultiIndex(levels=new_levels, codes=new_codes, names=self.names, sortorder=self.sortorder, verify_integrity=False) return mi.values @@ -848,15 +877,15 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, return [] stringified_levels = [] - for lev, lab in zip(self.levels, self.labels): + for lev, level_codes in zip(self.levels, self.codes): na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) if len(lev) > 0: - formatted = lev.take(lab).format(formatter=formatter) + formatted = lev.take(level_codes).format(formatter=formatter) # we have some NA - mask = lab == -1 + mask = level_codes == -1 if mask.any(): formatted = np.array(formatted, dtype=object) formatted[mask] = na @@ -866,7 +895,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, # weird all NA case formatted = [pprint_thing(na if isna(x) else x, escape_chars=('\t', '\r', '\n')) - for x in algos.take_1d(lev._values, lab)] + for x in algos.take_1d(lev._values, level_codes)] stringified_levels.append(formatted) result_levels = [] @@ -905,7 +934,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, # -------------------------------------------------------------------- def __len__(self): - return len(self.labels[0]) + return len(self.codes[0]) def _get_names(self): return FrozenList(level.name for level in self.levels) @@ -967,7 +996,7 @@ def _set_names(self, names, level=None, validate=True): @Appender(_index_shared_docs['_get_grouper_for_level']) def _get_grouper_for_level(self, mapper, level): - indexer = self.labels[level] + indexer = self.codes[level] level_index = self.levels[level] if mapper is not None: @@ -976,25 +1005,24 @@ def _get_grouper_for_level(self, mapper, level): grouper = level_values.map(mapper) return grouper, None, None - labels, uniques = algos.factorize(indexer, sort=True) + codes, uniques = algos.factorize(indexer, sort=True) if len(uniques) > 0 and uniques[0] == -1: # Handle NAs mask = indexer != -1 - ok_labels, uniques = algos.factorize(indexer[mask], - sort=True) + ok_codes, uniques = algos.factorize(indexer[mask], sort=True) - labels = np.empty(len(indexer), dtype=indexer.dtype) - labels[mask] = ok_labels - labels[~mask] = -1 + codes = np.empty(len(indexer), dtype=indexer.dtype) + codes[mask] = ok_codes + codes[~mask] = -1 if len(uniques) < len(level_index): # Remove unobserved levels from level_index level_index = level_index.take(uniques) - grouper = level_index.take(labels) + grouper = level_index.take(codes) - return grouper, labels, level_index + return grouper, codes, level_index @property def _constructor(self): @@ -1048,8 +1076,8 @@ def _engine(self): # Check the total number of bits needed for our representation: if lev_bits[0] > 64: # The levels would overflow a 64 bit uint - use Python integers: - return MultiIndexPyIntEngine(self.levels, self.labels, offsets) - return MultiIndexUIntEngine(self.levels, self.labels, offsets) + return MultiIndexPyIntEngine(self.levels, self.codes, offsets) + return MultiIndexUIntEngine(self.levels, self.codes, offsets) @property def values(self): @@ -1160,7 +1188,7 @@ def duplicated(self, keep='first'): from pandas._libs.hashtable import duplicated_int64 shape = map(len, self.levels) - ids = get_group_index(self.labels, shape, sort=False, xnull=False) + ids = get_group_index(self.codes, shape, sort=False, xnull=False) return duplicated_int64(ids, keep) @@ -1172,7 +1200,7 @@ def fillna(self, value=None, downcast=None): @Appender(_index_shared_docs['dropna']) def dropna(self, how='any'): - nans = [label == -1 for label in self.labels] + nans = [level_codes == -1 for level_codes in self.codes] if how == 'any': indexer = np.any(nans, axis=0) elif how == 'all': @@ -1180,8 +1208,8 @@ def dropna(self, how='any'): else: raise ValueError("invalid how option: {0}".format(how)) - new_labels = [label[~indexer] for label in self.labels] - return self.copy(labels=new_labels, deep=True) + new_codes = [level_codes[~indexer] for level_codes in self.codes] + return self.copy(codes=new_codes, deep=True) def get_value(self, series, key): # somewhat broken encapsulation @@ -1262,10 +1290,10 @@ def _get_level_values(self, level, unique=False): """ values = self.levels[level] - labels = self.labels[level] + level_codes = self.codes[level] if unique: - labels = algos.unique(labels) - filled = algos.take_1d(values._values, labels, + level_codes = algos.unique(level_codes) + filled = algos.take_1d(values._values, level_codes, fill_value=values._na_value) values = values._shallow_copy(filled) return values @@ -1401,14 +1429,15 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) """ levels = self.levels - labels = [np.repeat(x, n_repeat) for x in self.labels] - # Assumes that each label is divisible by n_shuffle - labels = [x.reshape(n_shuffle, -1).ravel(order='F') for x in labels] + codes = [np.repeat(level_codes, n_repeat) for + level_codes in self.codes] + # Assumes that each level_codes is divisible by n_shuffle + codes = [x.reshape(n_shuffle, -1).ravel(order='F') for x in codes] names = self.names warnings.warn("Method .to_hierarchical is deprecated and will " "be removed in a future version", FutureWarning, stacklevel=2) - return MultiIndex(levels=levels, labels=labels, names=names) + return MultiIndex(levels=levels, codes=codes, names=names) def to_flat_index(self): """ @@ -1444,7 +1473,7 @@ def is_all_dates(self): def is_lexsorted(self): """ - Return True if the labels are lexicographically sorted + Return True if the codes are lexicographically sorted """ return self.lexsort_depth == self.nlevels @@ -1456,9 +1485,9 @@ def lexsort_depth(self): else: return 0 - int64_labels = [ensure_int64(lab) for lab in self.labels] + int64_codes = [ensure_int64(level_codes) for level_codes in self.codes] for k in range(self.nlevels, 0, -1): - if libalgos.is_lexsorted(int64_labels[:k]): + if libalgos.is_lexsorted(int64_codes[:k]): return k return 0 @@ -1485,7 +1514,7 @@ def _sort_levels_monotonic(self): -------- >>> i = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) >>> i MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) @@ -1500,9 +1529,9 @@ def _sort_levels_monotonic(self): return self new_levels = [] - new_labels = [] + new_codes = [] - for lev, lab in zip(self.levels, self.labels): + for lev, level_codes in zip(self.levels, self.codes): if not lev.is_monotonic: try: @@ -1513,15 +1542,15 @@ def _sort_levels_monotonic(self): else: lev = lev.take(indexer) - # indexer to reorder the labels + # indexer to reorder the level codes indexer = ensure_int64(indexer) ri = lib.get_reverse_indexer(indexer, len(indexer)) - lab = algos.take_1d(ri, lab) + level_codes = algos.take_1d(ri, level_codes) new_levels.append(lev) - new_labels.append(lab) + new_codes.append(level_codes) - return MultiIndex(new_levels, new_labels, + return MultiIndex(new_levels, new_codes, names=self.names, sortorder=self.sortorder, verify_integrity=False) @@ -1559,15 +1588,15 @@ def remove_unused_levels(self): """ new_levels = [] - new_labels = [] + new_codes = [] changed = False - for lev, lab in zip(self.levels, self.labels): + for lev, level_codes in zip(self.levels, self.codes): # Since few levels are typically unused, bincount() is more # efficient than unique() - however it only accepts positive values # (and drops order): - uniques = np.where(np.bincount(lab + 1) > 0)[0] - 1 + uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1 has_na = int(len(uniques) and (uniques[0] == -1)) if len(uniques) != len(lev) + has_na: @@ -1576,33 +1605,34 @@ def remove_unused_levels(self): # Recalculate uniques, now preserving order. # Can easily be cythonized by exploiting the already existing - # "uniques" and stop parsing "lab" when all items are found: - uniques = algos.unique(lab) + # "uniques" and stop parsing "level_codes" when all items + # are found: + uniques = algos.unique(level_codes) if has_na: na_idx = np.where(uniques == -1)[0] # Just ensure that -1 is in first position: uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] - # labels get mapped from uniques to 0:len(uniques) + # codes get mapped from uniques to 0:len(uniques) # -1 (if present) is mapped to last position - label_mapping = np.zeros(len(lev) + has_na) + code_mapping = np.zeros(len(lev) + has_na) # ... and reassigned value -1: - label_mapping[uniques] = np.arange(len(uniques)) - has_na + code_mapping[uniques] = np.arange(len(uniques)) - has_na - lab = label_mapping[lab] + level_codes = code_mapping[level_codes] # new levels are simple lev = lev.take(uniques[has_na:]) new_levels.append(lev) - new_labels.append(lab) + new_codes.append(level_codes) result = self._shallow_copy() if changed: result._reset_identity() result._set_levels(new_levels, validate=False) - result._set_labels(new_labels, validate=False) + result._set_codes(new_codes, validate=False) return result @@ -1619,7 +1649,7 @@ def levshape(self): def __reduce__(self): """Necessary for making this object picklable""" d = dict(levels=[lev for lev in self.levels], - labels=[label for label in self.labels], + codes=[level_codes for level_codes in self.codes], sortorder=self.sortorder, names=list(self.names)) return ibase._new_Index, (self.__class__, d), None @@ -1628,17 +1658,17 @@ def __setstate__(self, state): if isinstance(state, dict): levels = state.get('levels') - labels = state.get('labels') + codes = state.get('codes') sortorder = state.get('sortorder') names = state.get('names') elif isinstance(state, tuple): nd_state, own_state = state - levels, labels, sortorder, names = own_state + levels, codes, sortorder, names = own_state self._set_levels([Index(x) for x in levels], validate=False) - self._set_labels(labels) + self._set_codes(codes) self._set_names(names) self.sortorder = sortorder self._verify_integrity() @@ -1649,11 +1679,11 @@ def __getitem__(self, key): key = com.cast_scalar_indexer(key) retval = [] - for lev, lab in zip(self.levels, self.labels): - if lab[key] == -1: + for lev, level_codes in zip(self.levels, self.codes): + if level_codes[key] == -1: retval.append(np.nan) else: - retval.append(lev[lab[key]]) + retval.append(lev[level_codes[key]]) return tuple(retval) else: @@ -1667,9 +1697,9 @@ def __getitem__(self, key): if isinstance(key, Index): key = np.asarray(key) - new_labels = [lab[key] for lab in self.labels] + new_codes = [level_codes[key] for level_codes in self.codes] - return MultiIndex(levels=self.levels, labels=new_labels, + return MultiIndex(levels=self.levels, codes=new_codes, names=self.names, sortorder=sortorder, verify_integrity=False) @@ -1678,11 +1708,11 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_platform_int(indices) - taken = self._assert_take_fillable(self.labels, indices, + taken = self._assert_take_fillable(self.codes, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=-1) - return MultiIndex(levels=self.levels, labels=taken, + return MultiIndex(levels=self.levels, codes=taken, names=self.names, verify_integrity=False) def _assert_take_fillable(self, values, indices, allow_fill=True, @@ -1694,7 +1724,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') raise ValueError(msg) - taken = [lab.take(indices) for lab in self.labels] + taken = [lab.take(indices) for lab in self.codes] mask = indices == -1 if mask.any(): masked = [] @@ -1704,7 +1734,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, masked.append(np.asarray(label_values)) taken = masked else: - taken = [lab.take(indices) for lab in self.labels] + taken = [lab.take(indices) for lab in self.codes] return taken def append(self, other): @@ -1746,21 +1776,23 @@ def argsort(self, *args, **kwargs): def repeat(self, repeats, *args, **kwargs): nv.validate_repeat(args, kwargs) return MultiIndex(levels=self.levels, - labels=[label.view(np.ndarray).repeat(repeats) - for label in self.labels], names=self.names, - sortorder=self.sortorder, verify_integrity=False) + codes=[level_codes.view(np.ndarray).repeat(repeats) + for level_codes in self.codes], + names=self.names, sortorder=self.sortorder, + verify_integrity=False) def where(self, cond, other=None): raise NotImplementedError(".where is not supported for " "MultiIndex operations") - def drop(self, labels, level=None, errors='raise'): + @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes') + def drop(self, codes, level=None, errors='raise'): """ - Make new MultiIndex with passed list of labels deleted + Make new MultiIndex with passed list of codes deleted Parameters ---------- - labels : array-like + codes : array-like Must be a list of tuples level : int or level name, default None @@ -1769,24 +1801,24 @@ def drop(self, labels, level=None, errors='raise'): dropped : MultiIndex """ if level is not None: - return self._drop_from_level(labels, level) + return self._drop_from_level(codes, level) try: - if not isinstance(labels, (np.ndarray, Index)): - labels = com.index_labels_to_array(labels) - indexer = self.get_indexer(labels) + if not isinstance(codes, (np.ndarray, Index)): + codes = com.index_labels_to_array(codes) + indexer = self.get_indexer(codes) mask = indexer == -1 if mask.any(): if errors != 'ignore': - raise ValueError('labels %s not contained in axis' % - labels[mask]) + raise ValueError('codes %s not contained in axis' % + codes[mask]) except Exception: pass inds = [] - for label in labels: + for level_codes in codes: try: - loc = self.get_loc(label) + loc = self.get_loc(level_codes) # get_loc returns either an integer, a slice, or a boolean # mask if isinstance(loc, int): @@ -1811,13 +1843,13 @@ def drop(self, labels, level=None, errors='raise'): return self.delete(inds) - def _drop_from_level(self, labels, level): - labels = com.index_labels_to_array(labels) + def _drop_from_level(self, codes, level): + codes = com.index_labels_to_array(codes) i = self._get_level_number(level) index = self.levels[i] - values = index.get_indexer(labels) + values = index.get_indexer(codes) - mask = ~algos.isin(self.labels[i], values) + mask = ~algos.isin(self.codes[i], values) return self[mask] @@ -1855,7 +1887,7 @@ def swaplevel(self, i=-2, j=-1): Examples -------- >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - ... labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) >>> mi MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) @@ -1864,17 +1896,17 @@ def swaplevel(self, i=-2, j=-1): labels=[[0, 1, 0, 1], [0, 0, 1, 1]]) """ new_levels = list(self.levels) - new_labels = list(self.labels) + new_codes = list(self.codes) new_names = list(self.names) i = self._get_level_number(i) j = self._get_level_number(j) new_levels[i], new_levels[j] = new_levels[j], new_levels[i] - new_labels[i], new_labels[j] = new_labels[j], new_labels[i] + new_codes[i], new_codes[j] = new_codes[j], new_codes[i] new_names[i], new_names[j] = new_names[j], new_names[i] - return MultiIndex(levels=new_levels, labels=new_labels, + return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) def reorder_levels(self, order): @@ -1890,31 +1922,33 @@ def reorder_levels(self, order): 'number of levels (%d), got %d' % (self.nlevels, len(order))) new_levels = [self.levels[i] for i in order] - new_labels = [self.labels[i] for i in order] + new_codes = [self.codes[i] for i in order] new_names = [self.names[i] for i in order] - return MultiIndex(levels=new_levels, labels=new_labels, + return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) def __getslice__(self, i, j): return self.__getitem__(slice(i, j)) - def _get_labels_for_sorting(self): + def _get_codes_for_sorting(self): """ - we categorizing our labels by using the - available catgories (all, not just observed) + we categorizing our codes by using the + available categories (all, not just observed) excluding any missing ones (-1); this is in preparation for sorting, where we need to disambiguate that -1 is not a valid valid """ from pandas.core.arrays import Categorical - def cats(label): - return np.arange(np.array(label).max() + 1 if len(label) else 0, - dtype=label.dtype) + def cats(level_codes): + return np.arange(np.array(level_codes).max() + 1 if + len(level_codes) else 0, + dtype=level_codes.dtype) - return [Categorical.from_codes(label, cats(label), ordered=True) - for label in self.labels] + return [Categorical.from_codes(level_codes, cats(level_codes), + ordered=True) + for level_codes in self.codes] def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ @@ -1951,21 +1985,21 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): raise ValueError("level must have same length as ascending") from pandas.core.sorting import lexsort_indexer - indexer = lexsort_indexer([self.labels[lev] for lev in level], + indexer = lexsort_indexer([self.codes[lev] for lev in level], orders=ascending) # level ordering else: - labels = list(self.labels) + codes = list(self.codes) shape = list(self.levshape) - # partition labels and shape - primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level)) + # partition codes and shape + primary = tuple(codes.pop(lev - i) for i, lev in enumerate(level)) primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level)) if sort_remaining: - primary += primary + tuple(labels) + primary += primary + tuple(codes) primshp += primshp + tuple(shape) else: sortorder = level[0] @@ -1977,9 +2011,9 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): indexer = indexer[::-1] indexer = ensure_platform_int(indexer) - new_labels = [lab.take(indexer) for lab in self.labels] + new_codes = [level_codes.take(indexer) for level_codes in self.codes] - new_index = MultiIndex(labels=new_labels, levels=self.levels, + new_index = MultiIndex(codes=new_codes, levels=self.levels, names=self.names, sortorder=sortorder, verify_integrity=False) @@ -2194,7 +2228,7 @@ def _partial_tup_index(self, tup, side='left'): n = len(tup) start, end = 0, len(self) - zipped = zip(tup, self.levels, self.labels) + zipped = zip(tup, self.levels, self.codes) for k, (lab, lev, labs) in enumerate(zipped): section = labs[start:end] @@ -2306,7 +2340,7 @@ def _maybe_to_slice(loc): loc = np.arange(start, stop, dtype='int64') for i, k in enumerate(follow_key, len(lead_key)): - mask = self.labels[i][loc] == self.levels[i].get_loc(k) + mask = self.codes[i][loc] == self.levels[i].get_loc(k) if not mask.all(): loc = loc[mask] if not len(loc): @@ -2457,15 +2491,16 @@ def _get_level_indexer(self, key, level=0, indexer=None): # if the indexer is provided, then use this level_index = self.levels[level] - labels = self.labels[level] + level_codes = self.codes[level] - def convert_indexer(start, stop, step, indexer=indexer, labels=labels): - # given the inputs and the labels/indexer, compute an indexer set + def convert_indexer(start, stop, step, indexer=indexer, + codes=level_codes): + # given the inputs and the codes/indexer, compute an indexer set # if we have a provided indexer, then this need not consider # the entire labels set r = np.arange(start, stop, step) - if indexer is not None and len(indexer) != len(labels): + if indexer is not None and len(indexer) != len(codes): # we have an indexer which maps the locations in the labels # that we have already selected (and is not an indexer for the @@ -2475,14 +2510,14 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # selected from pandas import Series mapper = Series(indexer) - indexer = labels.take(ensure_platform_int(indexer)) + indexer = codes.take(ensure_platform_int(indexer)) result = Series(Index(indexer).isin(r).nonzero()[0]) m = result.map(mapper)._ndarray_values else: - m = np.zeros(len(labels), dtype=bool) - m[np.in1d(labels, r, - assume_unique=Index(labels).is_unique)] = True + m = np.zeros(len(codes), dtype=bool) + m[np.in1d(codes, r, + assume_unique=Index(codes).is_unique)] = True return m @@ -2522,8 +2557,8 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): return convert_indexer(start, stop + 1, step) else: # sorted, so can return slice object -> view - i = labels.searchsorted(start, side='left') - j = labels.searchsorted(stop, side='right') + i = level_codes.searchsorted(start, side='left') + j = level_codes.searchsorted(stop, side='right') return slice(i, j, step) else: @@ -2532,14 +2567,14 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): if level > 0 or self.lexsort_depth == 0: # Desired level is not sorted - locs = np.array(labels == code, dtype=bool, copy=False) + locs = np.array(level_codes == code, dtype=bool, copy=False) if not locs.any(): # The label is present in self.levels[level] but unused: raise KeyError(key) return locs - i = labels.searchsorted(code, side='left') - j = labels.searchsorted(code, side='right') + i = level_codes.searchsorted(code, side='left') + j = level_codes.searchsorted(code, side='right') if i == j: # The label is present in self.levels[level] but unused: raise KeyError(key) @@ -2689,10 +2724,10 @@ def truncate(self, before=None, after=None): new_levels = list(self.levels) new_levels[0] = new_levels[0][i:j] - new_labels = [lab[left:right] for lab in self.labels] - new_labels[0] = new_labels[0] - i + new_codes = [level_codes[left:right] for level_codes in self.codes] + new_codes[0] = new_codes[0] - i - return MultiIndex(levels=new_levels, labels=new_labels, + return MultiIndex(levels=new_levels, codes=new_codes, verify_integrity=False) def equals(self, other): @@ -2721,26 +2756,26 @@ def equals(self, other): return False for i in range(self.nlevels): - slabels = self.labels[i] - slabels = slabels[slabels != -1] - svalues = algos.take_nd(np.asarray(self.levels[i]._values), - slabels, allow_fill=False) - - olabels = other.labels[i] - olabels = olabels[olabels != -1] - ovalues = algos.take_nd( + self_codes = self.codes[i] + self_codes = self_codes[self_codes != -1] + self_values = algos.take_nd(np.asarray(self.levels[i]._values), + self_codes, allow_fill=False) + + other_codes = other.codes[i] + other_codes = other_codes[other_codes != -1] + other_values = algos.take_nd( np.asarray(other.levels[i]._values), - olabels, allow_fill=False) + other_codes, allow_fill=False) # since we use NaT both datetime64 and timedelta64 # we can have a situation where a level is typed say # timedelta64 in self (IOW it has other values than NaT) # but types datetime64 in other (where its all NaT) # but these are equivalent - if len(svalues) == 0 and len(ovalues) == 0: + if len(self_values) == 0 and len(other_values) == 0: continue - if not array_equivalent(svalues, ovalues): + if not array_equivalent(self_values, other_values): return False return True @@ -2806,7 +2841,7 @@ def intersection(self, other): uniq_tuples = sorted(set(self_tuples) & set(other_tuples)) if len(uniq_tuples) == 0: return MultiIndex(levels=self.levels, - labels=[[]] * self.nlevels, + codes=[[]] * self.nlevels, names=result_names, verify_integrity=False) else: return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, @@ -2836,7 +2871,7 @@ def difference(self, other, sort=True): if self.equals(other): return MultiIndex(levels=self.levels, - labels=[[]] * self.nlevels, + codes=[[]] * self.nlevels, names=result_names, verify_integrity=False) this = self._get_unique_index() @@ -2852,7 +2887,7 @@ def difference(self, other, sort=True): if len(difference) == 0: return MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, + codes=[[]] * self.nlevels, names=result_names, verify_integrity=False) else: return MultiIndex.from_tuples(difference, sortorder=0, @@ -2878,7 +2913,7 @@ def _convert_can_do_setop(self, other): if not hasattr(other, 'names'): if len(other) == 0: other = MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, + codes=[[]] * self.nlevels, verify_integrity=False) else: msg = 'other must be a MultiIndex or a list of tuples' @@ -2913,21 +2948,22 @@ def insert(self, loc, item): 'levels.') new_levels = [] - new_labels = [] - for k, level, labels in zip(item, self.levels, self.labels): + new_codes = [] + for k, level, level_codes in zip(item, self.levels, self.codes): if k not in level: # have to insert into level # must insert at end otherwise you have to recompute all the - # other labels + # other codes lev_loc = len(level) level = level.insert(lev_loc, k) else: lev_loc = level.get_loc(k) new_levels.append(level) - new_labels.append(np.insert(ensure_int64(labels), loc, lev_loc)) + new_codes.append(np.insert( + ensure_int64(level_codes), loc, lev_loc)) - return MultiIndex(levels=new_levels, labels=new_labels, + return MultiIndex(levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False) def delete(self, loc): @@ -2938,8 +2974,8 @@ def delete(self, loc): ------- new_index : MultiIndex """ - new_labels = [np.delete(lab, loc) for lab in self.labels] - return MultiIndex(levels=self.levels, labels=new_labels, + new_codes = [np.delete(level_codes, loc) for level_codes in self.codes] + return MultiIndex(levels=self.levels, codes=new_codes, names=self.names, verify_integrity=False) def _wrap_joined_index(self, joined, other): @@ -2955,13 +2991,13 @@ def isin(self, values, level=None): else: num = self._get_level_number(level) levs = self.levels[num] - labs = self.labels[num] + level_codes = self.codes[num] sought_labels = levs.isin(values).nonzero()[0] if levs.size == 0: - return np.zeros(len(labs), dtype=np.bool_) + return np.zeros(len(level_codes), dtype=np.bool_) else: - return np.lib.arraysetops.in1d(labs, sought_labels) + return np.lib.arraysetops.in1d(level_codes, sought_labels) MultiIndex._add_numeric_methods_disabled() diff --git a/pandas/core/panel.py b/pandas/core/panel.py index bfa00d1352401..bb3412a3d7c0c 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -953,46 +953,46 @@ def to_frame(self, filter_observations=True): def construct_multi_parts(idx, n_repeat, n_shuffle=1): # Replicates and shuffles MultiIndex, returns individual attributes - labels = [np.repeat(x, n_repeat) for x in idx.labels] + codes = [np.repeat(x, n_repeat) for x in idx.codes] # Assumes that each label is divisible by n_shuffle - labels = [x.reshape(n_shuffle, -1).ravel(order='F') - for x in labels] - labels = [x[selector] for x in labels] + codes = [x.reshape(n_shuffle, -1).ravel(order='F') + for x in codes] + codes = [x[selector] for x in codes] levels = idx.levels names = idx.names - return labels, levels, names + return codes, levels, names def construct_index_parts(idx, major=True): levels = [idx] if major: - labels = [np.arange(N).repeat(K)[selector]] + codes = [np.arange(N).repeat(K)[selector]] names = idx.name or 'major' else: - labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] - labels = [labels.ravel()[selector]] + codes = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] + codes = [codes.ravel()[selector]] names = idx.name or 'minor' names = [names] - return labels, levels, names + return codes, levels, names if isinstance(self.major_axis, MultiIndex): - major_labels, major_levels, major_names = construct_multi_parts( + major_codes, major_levels, major_names = construct_multi_parts( self.major_axis, n_repeat=K) else: - major_labels, major_levels, major_names = construct_index_parts( + major_codes, major_levels, major_names = construct_index_parts( self.major_axis) if isinstance(self.minor_axis, MultiIndex): - minor_labels, minor_levels, minor_names = construct_multi_parts( + minor_codes, minor_levels, minor_names = construct_multi_parts( self.minor_axis, n_repeat=N, n_shuffle=K) else: - minor_labels, minor_levels, minor_names = construct_index_parts( + minor_codes, minor_levels, minor_names = construct_index_parts( self.minor_axis, major=False) levels = major_levels + minor_levels - labels = major_labels + minor_labels + codes = major_codes + minor_codes names = major_names + minor_names - index = MultiIndex(levels=levels, labels=labels, names=names, + index = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False) return DataFrame(data, index=index, columns=self.items) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index f01c9d29fd457..b13b22d2e8266 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -555,9 +555,9 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): levels = [ensure_index(x) for x in levels] if not _all_indexes_same(indexes): - label_list = [] + codes_list = [] - # things are potentially different sizes, so compute the exact labels + # things are potentially different sizes, so compute the exact codes # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): @@ -570,18 +570,18 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): .format(key=key, level=level)) to_concat.append(np.repeat(i, len(index))) - label_list.append(np.concatenate(to_concat)) + codes_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) - label_list.extend(concat_index.labels) + codes_list.extend(concat_index.codes) else: codes, categories = _factorize_from_iterable(concat_index) levels.append(categories) - label_list.append(codes) + codes_list.append(codes) if len(names) == len(levels): names = list(names) @@ -594,7 +594,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): # also copies names = names + _get_consensus_names(indexes) - return MultiIndex(levels=levels, labels=label_list, names=names, + return MultiIndex(levels=levels, codes=codes_list, names=names, verify_integrity=False) new_index = indexes[0] @@ -605,8 +605,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): new_names = list(names) new_levels = list(levels) - # construct labels - new_labels = [] + # construct codes + new_codes = [] # do something a bit more speedy @@ -619,17 +619,17 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): raise ValueError('Values not found in passed level: {hlevel!s}' .format(hlevel=hlevel[mask])) - new_labels.append(np.repeat(mapped, n)) + new_codes.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) - new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) + new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) else: new_levels.append(new_index) - new_labels.append(np.tile(np.arange(n), kpieces)) + new_codes.append(np.tile(np.arange(n), kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) - return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, + return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index b078ff32f6944..c0c016f9a8caa 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -857,9 +857,9 @@ def _get_merge_keys(self): left_keys.append(left._get_label_or_level_values(k)) join_names.append(k) if isinstance(self.right.index, MultiIndex): - right_keys = [lev._values.take(lab) - for lev, lab in zip(self.right.index.levels, - self.right.index.labels)] + right_keys = [lev._values.take(lev_codes) for lev, lev_codes + in zip(self.right.index.levels, + self.right.index.codes)] else: right_keys = [self.right.index.values] elif _any(self.right_on): @@ -871,9 +871,9 @@ def _get_merge_keys(self): right_keys.append(right._get_label_or_level_values(k)) join_names.append(k) if isinstance(self.left.index, MultiIndex): - left_keys = [lev._values.take(lab) - for lev, lab in zip(self.left.index.levels, - self.left.index.labels)] + left_keys = [lev._values.take(lev_codes) for lev, lev_codes + in zip(self.left.index.levels, + self.left.index.codes)] else: left_keys = [self.left.index.values] @@ -1172,7 +1172,7 @@ def _convert_to_mulitindex(index): join_index = _convert_to_mulitindex(join_index) join_levels = join_index.levels - join_labels = join_index.labels + join_codes = join_index.codes join_names = join_index.names # lindexer and rindexer hold the indexes where the join occurred @@ -1197,16 +1197,16 @@ def _convert_to_mulitindex(index): name_idx = idx.names.index(dropped_level_name) restore_levels = idx.levels[name_idx] - # Inject -1 in the labels list where a join was not possible + # Inject -1 in the codes list where a join was not possible # IOW indexer[i]=-1 - labels = idx.labels[name_idx] - restore_labels = algos.take_nd(labels, indexer, fill_value=-1) + codes = idx.codes[name_idx] + restore_codes = algos.take_nd(codes, indexer, fill_value=-1) join_levels = join_levels + [restore_levels] - join_labels = join_labels + [restore_labels] + join_codes = join_codes + [restore_codes] join_names = join_names + [dropped_level_name] - return join_levels, join_labels, join_names + return join_levels, join_codes, join_names class _OrderedMerge(_MergeOperation): @@ -1508,27 +1508,29 @@ def _get_multiindex_indexer(join_keys, index, sort): fkeys = partial(_factorize_keys, sort=sort) # left & right join labels and num. of levels at each location - rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys))) + rcodes, lcodes, shape = map(list, zip(* map(fkeys, + index.levels, + join_keys))) if sort: - rlab = list(map(np.take, rlab, index.labels)) + rcodes = list(map(np.take, rcodes, index.codes)) else: i8copy = lambda a: a.astype('i8', subok=False, copy=True) - rlab = list(map(i8copy, index.labels)) + rcodes = list(map(i8copy, index.codes)) # fix right labels if there were any nulls for i in range(len(join_keys)): - mask = index.labels[i] == -1 + mask = index.codes[i] == -1 if mask.any(): # check if there already was any nulls at this location # if there was, it is factorized to `shape[i] - 1` - a = join_keys[i][llab[i] == shape[i] - 1] + a = join_keys[i][lcodes[i] == shape[i] - 1] if a.size == 0 or not a[0] != a[0]: shape[i] += 1 - rlab[i][mask] = shape[i] - 1 + rcodes[i][mask] = shape[i] - 1 # get flat i8 join keys - lkey, rkey = _get_join_keys(llab, rlab, shape, sort) + lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort) # factorize keys to a dense i8 space lkey, rkey, count = fkeys(lkey, rkey) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 065728fb239ae..ba86d3d9ba25f 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -100,7 +100,7 @@ def __init__(self, values, index, level=-1, value_columns=None, self.level = self.index._get_level_number(level) # when index includes `nan`, need to lift levels/strides by 1 - self.lift = 1 if -1 in self.index.labels[self.level] else 0 + self.lift = 1 if -1 in self.index.codes[self.level] else 0 self.new_index_levels = list(self.index.levels) self.new_index_names = list(self.index.names) @@ -115,9 +115,9 @@ def __init__(self, values, index, level=-1, value_columns=None, def _make_sorted_values_labels(self): v = self.level - labs = list(self.index.labels) + codes = list(self.index.codes) levs = list(self.index.levels) - to_sort = labs[:v] + labs[v + 1:] + [labs[v]] + to_sort = codes[:v] + codes[v + 1:] + [codes[v]] sizes = [len(x) for x in levs[:v] + levs[v + 1:] + [levs[v]]] comp_index, obs_ids = get_compressed_ids(to_sort, sizes) @@ -243,16 +243,16 @@ def get_new_columns(self): new_levels = self.value_columns.levels + (self.removed_level_full,) new_names = self.value_columns.names + (self.removed_name,) - new_labels = [lab.take(propagator) - for lab in self.value_columns.labels] + new_codes = [lab.take(propagator) + for lab in self.value_columns.codes] else: new_levels = [self.value_columns, self.removed_level_full] new_names = [self.value_columns.name, self.removed_name] - new_labels = [propagator] + new_codes = [propagator] # The two indices differ only if the unstacked level had unused items: if len(self.removed_level_full) != len(self.removed_level): - # In this case, we remap the new labels to the original level: + # In this case, we remap the new codes to the original level: repeater = self.removed_level_full.get_indexer(self.removed_level) if self.lift: repeater = np.insert(repeater, 0, -1) @@ -261,22 +261,22 @@ def get_new_columns(self): repeater = np.arange(stride) - self.lift # The entire level is then just a repetition of the single chunk: - new_labels.append(np.tile(repeater, width)) - return MultiIndex(levels=new_levels, labels=new_labels, + new_codes.append(np.tile(repeater, width)) + return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) def get_new_index(self): - result_labels = [lab.take(self.compressor) - for lab in self.sorted_labels[:-1]] + result_codes = [lab.take(self.compressor) + for lab in self.sorted_labels[:-1]] # construct the new index if len(self.new_index_levels) == 1: - lev, lab = self.new_index_levels[0], result_labels[0] + lev, lab = self.new_index_levels[0], result_codes[0] if (lab == -1).any(): lev = lev.insert(len(lev), lev._na_value) return lev.take(lab) - return MultiIndex(levels=self.new_index_levels, labels=result_labels, + return MultiIndex(levels=self.new_index_levels, codes=result_codes, names=self.new_index_names, verify_integrity=False) @@ -293,25 +293,25 @@ def _unstack_multiple(data, clocs, fill_value=None): rlocs = [i for i in range(index.nlevels) if i not in clocs] clevels = [index.levels[i] for i in clocs] - clabels = [index.labels[i] for i in clocs] + ccodes = [index.codes[i] for i in clocs] cnames = [index.names[i] for i in clocs] rlevels = [index.levels[i] for i in rlocs] - rlabels = [index.labels[i] for i in rlocs] + rcodes = [index.codes[i] for i in rlocs] rnames = [index.names[i] for i in rlocs] shape = [len(x) for x in clevels] - group_index = get_group_index(clabels, shape, sort=False, xnull=False) + group_index = get_group_index(ccodes, shape, sort=False, xnull=False) comp_ids, obs_ids = compress_group_index(group_index, sort=False) - recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels, - xnull=False) + recons_codes = decons_obs_group_ids(comp_ids, obs_ids, shape, ccodes, + xnull=False) if rlocs == []: # Everything is in clocs, so the dummy df has a regular index dummy_index = Index(obs_ids, name='__placeholder__') else: dummy_index = MultiIndex(levels=rlevels + [obs_ids], - labels=rlabels + [comp_ids], + codes=rcodes + [comp_ids], names=rnames + ['__placeholder__'], verify_integrity=False) @@ -322,7 +322,7 @@ def _unstack_multiple(data, clocs, fill_value=None): unstacked = dummy.unstack('__placeholder__', fill_value=fill_value) new_levels = clevels new_names = cnames - new_labels = recons_labels + new_codes = recons_codes else: if isinstance(data.columns, MultiIndex): result = data @@ -344,11 +344,11 @@ def _unstack_multiple(data, clocs, fill_value=None): new_levels = [unstcols.levels[0]] + clevels new_names = [data.columns.name] + cnames - new_labels = [unstcols.labels[0]] - for rec in recons_labels: - new_labels.append(rec.take(unstcols.labels[-1])) + new_codes = [unstcols.codes[0]] + for rec in recons_codes: + new_codes.append(rec.take(unstcols.codes[-1])) - new_columns = MultiIndex(levels=new_levels, labels=new_labels, + new_columns = MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) if isinstance(unstacked, Series): @@ -467,21 +467,21 @@ def factorize(index): return _stack_multi_columns(frame, level_num=level_num, dropna=dropna) elif isinstance(frame.index, MultiIndex): new_levels = list(frame.index.levels) - new_labels = [lab.repeat(K) for lab in frame.index.labels] + new_codes = [lab.repeat(K) for lab in frame.index.codes] clev, clab = factorize(frame.columns) new_levels.append(clev) - new_labels.append(np.tile(clab, N).ravel()) + new_codes.append(np.tile(clab, N).ravel()) new_names = list(frame.index.names) new_names.append(frame.columns.name) - new_index = MultiIndex(levels=new_levels, labels=new_labels, + new_index = MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) else: levels, (ilab, clab) = zip(*map(factorize, (frame.index, frame.columns))) - labels = ilab.repeat(K), np.tile(clab, N).ravel() - new_index = MultiIndex(levels=levels, labels=labels, + codes = ilab.repeat(K), np.tile(clab, N).ravel() + new_index = MultiIndex(levels=levels, codes=codes, names=[frame.index.name, frame.columns.name], verify_integrity=False) @@ -592,9 +592,9 @@ def _convert_level_number(level_num, columns): # tuple list excluding level for grouping columns if len(frame.columns.levels) > 2: - tuples = list(zip(*[lev.take(lab) - for lev, lab in zip(this.columns.levels[:-1], - this.columns.labels[:-1])])) + tuples = list(zip(*[lev.take(level_codes) for lev, level_codes + in zip(this.columns.levels[:-1], + this.columns.codes[:-1])])) unique_groups = [key for key, _ in itertools.groupby(tuples)] new_names = this.columns.names[:-1] new_columns = MultiIndex.from_tuples(unique_groups, names=new_names) @@ -604,9 +604,9 @@ def _convert_level_number(level_num, columns): # time to ravel the values new_data = {} level_vals = this.columns.levels[-1] - level_labels = sorted(set(this.columns.labels[-1])) - level_vals_used = level_vals[level_labels] - levsize = len(level_labels) + level_codes = sorted(set(this.columns.codes[-1])) + level_vals_used = level_vals[level_codes] + levsize = len(level_codes) drop_cols = [] for key in unique_groups: try: @@ -625,8 +625,8 @@ def _convert_level_number(level_num, columns): slice_len = loc.stop - loc.start if slice_len != levsize: - chunk = this[this.columns[loc]] - chunk.columns = level_vals.take(chunk.columns.labels[-1]) + chunk = this.loc[:, this.columns[loc]] + chunk.columns = level_vals.take(chunk.columns.codes[-1]) value_slice = chunk.reindex(columns=level_vals_used).values else: if (frame._is_homogeneous_type and @@ -660,17 +660,17 @@ def _convert_level_number(level_num, columns): if isinstance(this.index, MultiIndex): new_levels = list(this.index.levels) new_names = list(this.index.names) - new_labels = [lab.repeat(levsize) for lab in this.index.labels] + new_codes = [lab.repeat(levsize) for lab in this.index.codes] else: new_levels = [this.index] - new_labels = [np.arange(N).repeat(levsize)] + new_codes = [np.arange(N).repeat(levsize)] new_names = [this.index.name] # something better? new_levels.append(level_vals) - new_labels.append(np.tile(level_labels, N)) + new_codes.append(np.tile(level_codes, N)) new_names.append(frame.columns.names[level_num]) - new_index = MultiIndex(levels=new_levels, labels=new_labels, + new_index = MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False) result = frame._constructor(new_data, index=new_index, columns=new_columns) @@ -979,13 +979,13 @@ def make_axis_dummies(frame, axis='minor', transform=None): num = numbers.get(axis, axis) items = frame.index.levels[num] - labels = frame.index.labels[num] + codes = frame.index.codes[num] if transform is not None: mapped_items = items.map(transform) - labels, items = _factorize_from_iterable(mapped_items.take(labels)) + codes, items = _factorize_from_iterable(mapped_items.take(codes)) values = np.eye(len(items), dtype=float) - values = values.take(labels, axis=0) + values = values.take(codes, axis=0) return DataFrame(values, columns=items, index=frame.index) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6b4c9927ef0f1..c9ef2bc9f8a3c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1463,14 +1463,14 @@ def count(self, level=None): level = self.index._get_level_number(level) lev = self.index.levels[level] - lab = np.array(self.index.labels[level], subok=False, copy=True) + level_codes = np.array(self.index.codes[level], subok=False, copy=True) - mask = lab == -1 + mask = level_codes == -1 if mask.any(): - lab[mask] = cnt = len(lev) + level_codes[mask] = cnt = len(lev) lev = lev.insert(cnt, lev._na_value) - obs = lab[notna(self.values)] + obs = level_codes[notna(self.values)] out = np.bincount(obs, minlength=len(lev) or None) return self._constructor(out, index=lev, dtype='int64').__finalize__(self) @@ -2829,7 +2829,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, elif isinstance(index, MultiIndex): from pandas.core.sorting import lexsort_indexer labels = index._sort_levels_monotonic() - indexer = lexsort_indexer(labels._get_labels_for_sorting(), + indexer = lexsort_indexer(labels._get_codes_for_sorting(), orders=ascending, na_position=na_position) else: @@ -3663,8 +3663,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None, >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], - ... labels=[[0, 0, 0, 1, 1, 1, 2, 2, 2], - ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], + ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], ... index=midx) >>> s diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 8fc6a8d8e923f..586193fe11850 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -967,7 +967,7 @@ def stack_sparse_frame(frame): nobs = sum(lengths) # this is pretty fast - minor_labels = np.repeat(np.arange(len(frame.columns)), lengths) + minor_codes = np.repeat(np.arange(len(frame.columns)), lengths) inds_to_concat = [] vals_to_concat = [] @@ -982,10 +982,10 @@ def stack_sparse_frame(frame): inds_to_concat.append(int_index.indices) vals_to_concat.append(series.sp_values) - major_labels = np.concatenate(inds_to_concat) + major_codes = np.concatenate(inds_to_concat) stacked_values = np.concatenate(vals_to_concat) index = MultiIndex(levels=[frame.index, frame.columns], - labels=[major_labels, minor_labels], + codes=[major_codes, minor_codes], verify_integrity=False) lp = DataFrame(stacked_values.reshape((nobs, 1)), index=index, diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 4be70c530b6b6..29fc1e3671a83 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -146,7 +146,7 @@ def hash_tuples(vals, encoding='utf8', hash_key=None): vals = MultiIndex.from_tuples(vals) # create a list-of-Categoricals - vals = [Categorical(vals.labels[level], + vals = [Categorical(vals.codes[level], vals.levels[level], ordered=False, fastpath=True) diff --git a/pandas/core/window.py b/pandas/core/window.py index 68a36fb2a6999..6c4dde54bd061 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2462,7 +2462,7 @@ def dataframe_from_int_dict(data, frame_template): # empty result result = DataFrame( index=MultiIndex(levels=[arg1.index, arg2.columns], - labels=[[], []]), + codes=[[], []]), columns=arg2.columns, dtype='float64') diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index c2ea3715b9f3b..d74722996a660 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -431,9 +431,9 @@ def _format_header_mi(self): name = columns.names[lnum] yield ExcelCell(lnum, coloffset, name, self.header_style) - for lnum, (spans, levels, labels) in enumerate(zip( - level_lengths, columns.levels, columns.labels)): - values = levels.take(labels) + for lnum, (spans, levels, level_codes) in enumerate(zip( + level_lengths, columns.levels, columns.codes)): + values = levels.take(level_codes) for i in spans: if spans[i] > 1: yield ExcelCell(lnum, coloffset + i + 1, values[i], @@ -574,11 +574,11 @@ def _format_hierarchical_rows(self): names=False) level_lengths = get_level_lengths(level_strs) - for spans, levels, labels in zip(level_lengths, - self.df.index.levels, - self.df.index.labels): + for spans, levels, level_codes in zip(level_lengths, + self.df.index.levels, + self.df.index.codes): - values = levels.take(labels, + values = levels.take(level_codes, allow_fill=levels._can_hold_na, fill_value=True) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 640034cb49d25..8132c458ce852 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2605,9 +2605,9 @@ def read_sparse_intindex(self, key, **kwargs): def write_multi_index(self, key, index): setattr(self.attrs, '%s_nlevels' % key, index.nlevels) - for i, (lev, lab, name) in enumerate(zip(index.levels, - index.labels, - index.names)): + for i, (lev, level_codes, name) in enumerate(zip(index.levels, + index.codes, + index.names)): # write the level level_key = '%s_level%d' % (key, i) conv_level = _convert_index(lev, self.encoding, self.errors, @@ -2622,13 +2622,13 @@ def write_multi_index(self, key, index): # write the labels label_key = '%s_label%d' % (key, i) - self.write_array(label_key, lab) + self.write_array(label_key, level_codes) def read_multi_index(self, key, **kwargs): nlevels = getattr(self.attrs, '%s_nlevels' % key) levels = [] - labels = [] + codes = [] names = [] for i in range(nlevels): level_key = '%s_level%d' % (key, i) @@ -2638,10 +2638,10 @@ def read_multi_index(self, key, **kwargs): names.append(name) label_key = '%s_label%d' % (key, i) - lab = self.read_array(label_key, **kwargs) - labels.append(lab) + level_codes = self.read_array(label_key, **kwargs) + codes.append(level_codes) - return MultiIndex(levels=levels, labels=labels, names=names, + return MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=True) def read_index_node(self, node, start=None, stop=None): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 33128a8ab179a..ac00e6a063104 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -712,9 +712,9 @@ def test_rename_bug2(self): def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], - [0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1]], + codes=[[0, 0, 0, 0, 0, 0], + [0, 1, 2, 0, 1, 2], + [0, 1, 0, 1, 0, 1]], names=['L0', 'L1', 'L2']) df = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=index) @@ -729,9 +729,9 @@ def test_reorder_levels(self): # rotate, position result = df.reorder_levels([1, 2, 0]) e_idx = MultiIndex(levels=[['one', 'two', 'three'], [0, 1], ['bar']], - labels=[[0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1], - [0, 0, 0, 0, 0, 0]], + codes=[[0, 1, 2, 0, 1, 2], + [0, 1, 0, 1, 0, 1], + [0, 0, 0, 0, 0, 0]], names=['L1', 'L2', 'L0']) expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=e_idx) @@ -739,9 +739,9 @@ def test_reorder_levels(self): result = df.reorder_levels([0, 0, 0]) e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']], - labels=[[0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0]], + codes=[[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]], names=['L0', 'L0', 'L0']) expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=e_idx) @@ -757,9 +757,9 @@ def test_reset_index(self, float_frame): names = ['first', 'second'] stacked.index.names = names deleveled = stacked.reset_index() - for i, (lev, lab) in enumerate(zip(stacked.index.levels, - stacked.index.labels)): - values = lev.take(lab) + for i, (lev, level_codes) in enumerate(zip(stacked.index.levels, + stacked.index.codes)): + values = lev.take(level_codes) name = names[i] tm.assert_index_equal(values, Index(deleveled[name])) @@ -1093,7 +1093,7 @@ def test_rename_axis_style_raises(self): df.rename(id, mapper=id) def test_reindex_api_equivalence(self): - # equivalence of the labels/axis and index/columns API's + # equivalence of the labels/axis and index/columns API's df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], index=['a', 'b', 'c'], columns=['d', 'e', 'f']) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 2bf2dd593184f..6c30f3fb02fb0 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1008,9 +1008,9 @@ def alt(x): assert_stat_op_api('kurt', float_frame, float_string_frame) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], - [0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1]]) + codes=[[0, 0, 0, 0, 0, 0], + [0, 1, 2, 0, 1, 2], + [0, 1, 0, 1, 0, 1]]) df = DataFrame(np.random.randn(6, 3), index=index) kurt = df.kurt() diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 0a61c844f1af8..b95dad422e90a 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -3191,7 +3191,7 @@ def test_type_error_multiindex(self): index = Index(range(2), name='i') columns = MultiIndex(levels=[['x', 'y'], [0, 1]], - labels=[[0, 1], [0, 0]], + codes=[[0, 1], [0, 0]], names=[None, 'c']) expected = DataFrame([[1, 2], [3, 4]], columns=columns, index=index) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index a53b01466c7a4..bc9a760bc9f1d 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -465,14 +465,14 @@ def test_unstack_level_binding(self): mi = pd.MultiIndex( levels=[[u('foo'), u('bar')], [u('one'), u('two')], [u('a'), u('b')]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]], names=[u('first'), u('second'), u('third')]) s = pd.Series(0, index=mi) result = s.unstack([1, 2]).stack(0) expected_mi = pd.MultiIndex( levels=[['foo', 'bar'], ['one', 'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=['first', 'second']) expected = pd.DataFrame(np.array([[np.nan, 0], @@ -499,7 +499,7 @@ def test_unstack_to_series(self): result = data.unstack() midx = MultiIndex(levels=[['x', 'y'], ['a', 'b', 'c']], - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx) assert_series_equal(result, expected) @@ -574,7 +574,7 @@ def test_unstack_non_unique_index_names(self): df.T.stack('c1') def test_unstack_unused_levels(self): - # GH 17845: unused labels in index make unstack() cast int to float + # GH 17845: unused codes in index make unstack() cast int to float idx = pd.MultiIndex.from_product([['a'], ['A', 'B', 'C', 'D']])[:-1] df = pd.DataFrame([[1, 0]] * 3, index=idx) @@ -587,8 +587,8 @@ def test_unstack_unused_levels(self): # Unused items on both levels levels = [[0, 1, 7], [0, 1, 2, 3]] - labels = [[0, 0, 1, 1], [0, 2, 0, 2]] - idx = pd.MultiIndex(levels, labels) + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] + idx = pd.MultiIndex(levels, codes) block = np.arange(4).reshape(2, 2) df = pd.DataFrame(np.concatenate([block, block + 4]), index=idx) result = df.unstack() @@ -600,8 +600,8 @@ def test_unstack_unused_levels(self): # With mixed dtype and NaN levels = [['a', 2, 'c'], [1, 3, 5, 7]] - labels = [[0, -1, 1, 1], [0, 2, -1, 2]] - idx = pd.MultiIndex(levels, labels) + codes = [[0, -1, 1, 1], [0, 2, -1, 2]] + idx = pd.MultiIndex(levels, codes) data = np.arange(8) df = pd.DataFrame(data.reshape(4, 2), index=idx) @@ -620,7 +620,7 @@ def test_unstack_unused_levels(self): @pytest.mark.parametrize("cols", [['A', 'C'], slice(None)]) def test_unstack_unused_level(self, cols): - # GH 18562 : unused labels on the unstacked level + # GH 18562 : unused codes on the unstacked level df = pd.DataFrame([[2010, 'a', 'I'], [2011, 'b', 'II']], columns=['A', 'B', 'C']) @@ -693,7 +693,7 @@ def verify(df): vals = list(map(list, zip(*vals))) idx = Index([nan, 0, 1, 2, 4, 5, 6, 7], name='B') cols = MultiIndex(levels=[['C'], ['a', 'b']], - labels=[[0, 0], [0, 1]], + codes=[[0, 0], [0, 1]], names=[None, 'A']) right = DataFrame(vals, columns=cols, index=idx) @@ -706,7 +706,7 @@ def verify(df): vals = [[2, nan], [0, 4], [1, 5], [nan, 6], [3, 7]] cols = MultiIndex(levels=[['C'], ['a', 'b']], - labels=[[0, 0], [0, 1]], + codes=[[0, 0], [0, 1]], names=[None, 'A']) idx = Index([nan, 0, 1, 2, 3], name='B') right = DataFrame(vals, columns=cols, index=idx) @@ -719,7 +719,7 @@ def verify(df): vals = [[3, nan], [0, 4], [1, 5], [2, 6], [nan, 7]] cols = MultiIndex(levels=[['C'], ['a', 'b']], - labels=[[0, 0], [0, 1]], + codes=[[0, 0], [0, 1]], names=[None, 'A']) idx = Index([nan, 0, 1, 2, 3], name='B') right = DataFrame(vals, columns=cols, index=idx) @@ -737,7 +737,7 @@ def verify(df): vals = np.array([[3, 0, 1, 2, nan, 4], [nan, 5, 6, 7, 8, 9]]) idx = Index(['a', 'b'], name='A') cols = MultiIndex(levels=[['C'], date_range('2012-01-01', periods=5)], - labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], names=[None, 'B']) right = DataFrame(vals, columns=cols, index=idx) @@ -759,11 +759,11 @@ def verify(df): [0.0, -0.00015, nan, 2.3614e-05, nan]] idx = MultiIndex(levels=[[680585148, 680607017], [0.0133]], - labels=[[0, 1], [-1, 0]], + codes=[[0, 1], [-1, 0]], names=['s_id', 'dosage']) cols = MultiIndex(levels=[['change'], ['Ag', 'Hg', 'Pb', 'Sn', 'U']], - labels=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]], + codes=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]], names=[None, 'agent']) right = DataFrame(vals, columns=cols, index=idx) @@ -851,8 +851,8 @@ def _test_stack_with_multiindex(multiindex): expected = DataFrame([[0, 2], [1, nan], [3, 5], [4, nan]], index=MultiIndex( levels=[[0, 1], ['u', 'x', 'y', 'z']], - labels=[[0, 0, 1, 1], - [1, 3, 1, 3]], + codes=[[0, 0, 1, 1], + [1, 3, 1, 3]], names=[None, 'Lower']), columns=Index(['B', 'C'], name='Upper'), dtype=df.dtypes[0]) diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index 877aa835ac6f5..657da422bf02c 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -8,8 +8,8 @@ def mframe(): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) return DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 3692d34afcc03..f0d0ac246a251 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -190,7 +190,7 @@ def test_level_get_group(observed): df = DataFrame(data=np.arange(2, 22, 2), index=MultiIndex( levels=[pd.CategoricalIndex(["a", "b"]), range(10)], - labels=[[0] * 5 + [1] * 5, range(10)], + codes=[[0] * 5 + [1] * 5, range(10)], names=["Index1", "Index2"])) g = df.groupby(level=["Index1"], observed=observed) @@ -199,7 +199,7 @@ def test_level_get_group(observed): expected = DataFrame(data=np.arange(2, 12, 2), index=pd.MultiIndex(levels=[pd.CategoricalIndex( ["a", "b"]), range(5)], - labels=[[0] * 5, range(5)], + codes=[[0] * 5, range(5)], names=["Index1", "Index2"])) result = g.get_group('a') diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index a14b6ff014f37..8b9f3607d5c3e 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -218,7 +218,7 @@ def test_count_with_only_nans_in_first_group(self): df = DataFrame({'A': [np.nan, np.nan], 'B': ['a', 'b'], 'C': [1, 2]}) result = df.groupby(['A', 'B']).C.count() mi = MultiIndex(levels=[[], ['a', 'b']], - labels=[[], []], + codes=[[], []], names=['A', 'B']) expected = Series([], index=mi, dtype=np.int64, name='C') assert_series_equal(result, expected, check_index_type=False) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 646445623778b..310a2fb1e609d 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -247,7 +247,7 @@ def test_non_cython_api(): expected_col = pd.MultiIndex(levels=[['B'], ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']], - labels=[[0] * 8, list(range(8))]) + codes=[[0] * 8, list(range(8))]) expected = pd.DataFrame([[1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]], @@ -733,7 +733,7 @@ def test_frame_describe_multikey(tsframe): # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], - labels=[[0] * len(group.columns), range(len(group.columns))]) + codes=[[0] * len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -747,7 +747,7 @@ def test_frame_describe_multikey(tsframe): expected = tsframe.describe().T expected.index = pd.MultiIndex( levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) + codes=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 162800b68de4f..6d9f60df45ec8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -76,7 +76,7 @@ def test_basic(dtype): def test_groupby_nonobject_dtype(mframe, df_mixed_floats): - key = mframe.index.labels[0] + key = mframe.index.codes[0] grouped = mframe.groupby(key) result = grouped.sum() @@ -295,7 +295,7 @@ def test_indices_concatenation_order(): def f1(x): y = x[(x.b % 2) == 1] ** 2 if y.empty: - multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2, + multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2, names=['b', 'c']) res = DataFrame(None, columns=['a'], index=multiindex) return res @@ -314,7 +314,7 @@ def f2(x): def f3(x): y = x[(x.b % 2) == 1] ** 2 if y.empty: - multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2, + multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2, names=['foo', 'bar']) res = DataFrame(None, columns=['a', 'b'], index=multiindex) return res @@ -1416,11 +1416,11 @@ def test_groupby_sort_multiindex_series(): # _compress_group_index # GH 9444 index = MultiIndex(levels=[[1, 2], [1, 2]], - labels=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]], + codes=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]], names=['a', 'b']) mseries = Series([0, 1, 2, 3, 4, 5], index=index) index = MultiIndex(levels=[[1, 2], [1, 2]], - labels=[[0, 0, 1], [1, 0, 0]], names=['a', 'b']) + codes=[[0, 0, 1], [1, 0, 0]], names=['a', 'b']) mseries_result = Series([0, 2, 4], index=index) result = mseries.groupby(level=['a', 'b'], sort=False).first() diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index b6c20d31cddf3..bcf4f42d8ca5e 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -483,8 +483,8 @@ def test_groupby_level_index_names(self): def test_groupby_level_with_nas(self, sort): # GH 17537 index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], - labels=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, - 2, 3]]) + codes=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, + 2, 3]]) # factorizing doesn't confuse things s = Series(np.arange(8.), index=index) @@ -493,8 +493,8 @@ def test_groupby_level_with_nas(self, sort): assert_series_equal(result, expected) index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], - labels=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, - 1, 2, 3]]) + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, + 1, 2, 3]]) # factorizing doesn't confuse things s = Series(np.arange(8.), index=index) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index d5096ee99c8b0..e0f1730d6909f 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -107,8 +107,8 @@ def s_whitelist_fixture(request): def mframe(): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) return DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) @@ -195,8 +195,8 @@ def test_groupby_frame_whitelist(df_letters, df_whitelist_fixture): def raw_frame(): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) raw_frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 9fad4547648d5..7fb862c69f5b2 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -13,11 +13,11 @@ def idx(): major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) index_names = ['first', 'second'] mi = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], + codes=[major_codes, minor_codes], names=index_names, verify_integrity=False) return mi @@ -28,11 +28,11 @@ def idx_dup(): major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) - major_labels = np.array([0, 0, 1, 0, 1, 1]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) + major_codes = np.array([0, 0, 1, 0, 1, 1]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) index_names = ['first', 'second'] mi = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], + codes=[major_codes, minor_codes], names=index_names, verify_integrity=False) return mi diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 3b40b2afe9c6d..a1fb242979a11 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -32,11 +32,11 @@ def test_truncate(): major_axis = Index(lrange(4)) minor_axis = Index(lrange(2)) - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) + codes=[major_codes, minor_codes]) result = index.truncate(before=1) assert 'foo' not in result.levels[0] @@ -282,13 +282,13 @@ def test_numpy_ufuncs(func): # parameters and fixtures at the same time. major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) index_names = ['first', 'second'] idx = MultiIndex( levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], + codes=[major_codes, minor_codes], names=index_names, verify_integrity=False ) @@ -307,13 +307,13 @@ def test_numpy_type_funcs(func): # parameters and fixtures at the same time. major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) index_names = ['first', 'second'] idx = MultiIndex( levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], + codes=[major_codes, minor_codes], names=index_names, verify_integrity=False ) diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index 70d79ddfdc22e..cc7b48069b354 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -11,7 +11,7 @@ def test_astype(idx): expected = idx.copy() actual = idx.astype('O') assert_copy(actual.levels, expected.levels) - assert_copy(actual.labels, expected.labels) + assert_copy(actual.codes, expected.codes) assert [level.name for level in actual.levels] == list(expected.names) with pytest.raises(TypeError, match="^Setting.*dtype.*object"): diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 23ea0c306d47c..f405fc659c709 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -62,10 +62,10 @@ def test_boolean_context_compat2(): def test_inplace_mutation_resets_values(): levels = [['a', 'b', 'c'], [4]] levels2 = [[1, 2, 3], ['a']] - labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] - mi1 = MultiIndex(levels=levels, labels=labels) - mi2 = MultiIndex(levels=levels2, labels=labels) + mi1 = MultiIndex(levels=levels, codes=codes) + mi2 = MultiIndex(levels=levels2, codes=codes) vals = mi1.values.copy() vals2 = mi2.values.copy() @@ -86,13 +86,13 @@ def test_inplace_mutation_resets_values(): tm.assert_almost_equal(mi1.values, vals2) # Make sure label setting works too - labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] + codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] exp_values = np.empty((6,), dtype=object) exp_values[:] = [(long(1), 'a')] * 6 # Must be 1d array of tuples assert exp_values.shape == (6,) - new_values = mi2.set_labels(labels2).values + new_values = mi2.set_codes(codes2).values # Not inplace shouldn't change tm.assert_almost_equal(mi2._tuples, vals2) @@ -101,7 +101,7 @@ def test_inplace_mutation_resets_values(): tm.assert_almost_equal(exp_values, new_values) # ...and again setting inplace should kill _tuples, etc - mi2.set_labels(labels2, inplace=True) + mi2.set_codes(codes2, inplace=True) tm.assert_almost_equal(mi2.values, new_values) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 4ad20e9d6ee81..d80395e513497 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -17,7 +17,7 @@ def test_constructor_single_level(): result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) + codes=[[0, 1, 2, 3]], names=['first']) assert isinstance(result, MultiIndex) expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') tm.assert_index_equal(result.levels[0], expected) @@ -25,29 +25,29 @@ def test_constructor_single_level(): def test_constructor_no_levels(): - msg = "non-zero number of levels/labels" + msg = "non-zero number of levels/codes" with pytest.raises(ValueError, match=msg): - MultiIndex(levels=[], labels=[]) + MultiIndex(levels=[], codes=[]) - both_re = re.compile('Must pass both levels and labels') + both_re = re.compile('Must pass both levels and codes') with pytest.raises(TypeError, match=both_re): MultiIndex(levels=[]) with pytest.raises(TypeError, match=both_re): - MultiIndex(labels=[]) + MultiIndex(codes=[]) def test_constructor_nonhashable_names(): # GH 20527 levels = [[1, 2], [u'one', u'two']] - labels = [[0, 0, 1, 1], [0, 1, 0, 1]] + codes = [[0, 0, 1, 1], [0, 1, 0, 1]] names = (['foo'], ['bar']) message = "MultiIndex.name must be a hashable type" with pytest.raises(TypeError, match=message): - MultiIndex(levels=levels, labels=labels, names=names) + MultiIndex(levels=levels, codes=codes, names=names) # With .rename() mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=('foo', 'bar')) renamed = [['foor'], ['barr']] with pytest.raises(TypeError, match=message): @@ -58,50 +58,59 @@ def test_constructor_nonhashable_names(): mi.set_names(names=renamed) -def test_constructor_mismatched_label_levels(idx): - labels = [np.array([1]), np.array([2]), np.array([3])] +def test_constructor_mismatched_codes_levels(idx): + codes = [np.array([1]), np.array([2]), np.array([3])] levels = ["a"] - msg = "Length of levels and labels must be the same" + msg = "Length of levels and codes must be the same" with pytest.raises(ValueError, match=msg): - MultiIndex(levels=levels, labels=labels) + MultiIndex(levels=levels, codes=codes) length_error = re.compile('>= length of level') - label_error = re.compile(r'Unequal label lengths: \[4, 2\]') + label_error = re.compile(r'Unequal code lengths: \[4, 2\]') # important to check that it's looking at the right thing. with pytest.raises(ValueError, match=length_error): MultiIndex(levels=[['a'], ['b']], - labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) + codes=[[0, 1, 2, 3], [0, 3, 4, 1]]) with pytest.raises(ValueError, match=label_error): - MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) + MultiIndex(levels=[['a'], ['b']], codes=[[0, 0, 0, 0], [0, 0]]) # external API with pytest.raises(ValueError, match=length_error): idx.copy().set_levels([['a'], ['b']]) with pytest.raises(ValueError, match=label_error): - idx.copy().set_labels([[0, 0, 0, 0], [0, 0]]) + idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) + + +def test_labels_deprecated(idx): + # GH23752 + with tm.assert_produces_warning(FutureWarning): + MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]], names=['first']) + with tm.assert_produces_warning(FutureWarning): + idx.labels def test_copy_in_constructor(): levels = np.array(["a", "b", "c"]) - labels = np.array([1, 1, 2, 0, 0, 1, 1]) - val = labels[0] - mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], + codes = np.array([1, 1, 2, 0, 0, 1, 1]) + val = codes[0] + mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True) - assert mi.labels[0][0] == val - labels[0] = 15 - assert mi.labels[0][0] == val + assert mi.codes[0][0] == val + codes[0] = 15 + assert mi.codes[0][0] == val val = levels[0] levels[0] = "PANDA" assert mi.levels[0][0] == val def test_from_arrays(idx): - arrays = [np.asarray(lev).take(lab) - for lev, lab in zip(idx.levels, idx.labels)] + arrays = [np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes)] # list of arrays as input result = MultiIndex.from_arrays(arrays, names=idx.names) @@ -116,8 +125,8 @@ def test_from_arrays(idx): def test_from_arrays_iterator(idx): # GH 18434 - arrays = [np.asarray(lev).take(lab) - for lev, lab in zip(idx.levels, idx.labels)] + arrays = [np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes)] # iterator as input result = MultiIndex.from_arrays(iter(arrays), names=idx.names) @@ -220,7 +229,7 @@ def test_from_arrays_index_series_categorical(): def test_from_arrays_empty(): # 0 levels - msg = "Must pass non-zero number of levels/labels" + msg = "Must pass non-zero number of levels/codes" with pytest.raises(ValueError, match=msg): MultiIndex.from_arrays(arrays=[]) @@ -235,7 +244,7 @@ def test_from_arrays_empty(): arrays = [[]] * N names = list('ABC')[:N] result = MultiIndex.from_arrays(arrays=arrays, names=names) - expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, + expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names) tm.assert_index_equal(result, expected) @@ -275,7 +284,7 @@ def test_from_tuples(): MultiIndex.from_tuples([]) expected = MultiIndex(levels=[[1, 3], [2, 4]], - labels=[[0, 1], [0, 1]], + codes=[[0, 1], [0, 1]], names=['a', 'b']) # input tuples @@ -287,7 +296,7 @@ def test_from_tuples_iterator(): # GH 18434 # input iterator for tuples expected = MultiIndex(levels=[[1, 3], [2, 4]], - labels=[[0, 1], [0, 1]], + codes=[[0, 1], [0, 1]], names=['a', 'b']) result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b']) @@ -314,7 +323,7 @@ def test_from_tuples_index_values(idx): def test_from_product_empty_zero_levels(): # 0 levels - msg = "Must pass non-zero number of levels/labels" + msg = "Must pass non-zero number of levels/codes" with pytest.raises(ValueError, match=msg): MultiIndex.from_product([]) @@ -334,7 +343,7 @@ def test_from_product_empty_two_levels(first, second): names = ['A', 'B'] result = MultiIndex.from_product([first, second], names=names) expected = MultiIndex(levels=[first, second], - labels=[[], []], names=names) + codes=[[], []], names=names) tm.assert_index_equal(result, expected) @@ -345,7 +354,7 @@ def test_from_product_empty_three_levels(N): lvl2 = lrange(N) result = MultiIndex.from_product([[], lvl2, []], names=names) expected = MultiIndex(levels=[[], lvl2, []], - labels=[[], [], []], names=names) + codes=[[], [], []], names=names) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py index deff6aacf8f9c..b73ff11a4dd4e 100644 --- a/pandas/tests/indexes/multi/test_contains.py +++ b/pandas/tests/indexes/multi/test_contains.py @@ -20,7 +20,7 @@ def test_contains_with_nat(): # MI with a NaT mi = MultiIndex(levels=[['C'], pd.date_range('2012-01-01', periods=5)], - labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], names=[None, 'B']) assert ('C', pd.Timestamp('2012-01-01')) in mi for val in mi.values: diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index fb734b016518e..b72fadfeeab72 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -90,8 +90,8 @@ def test_to_hierarchical(): check_stacklevel=False): result = index.to_hierarchical(3) expected = MultiIndex(levels=[[1, 2], ['one', 'two']], - labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) + codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) tm.assert_index_equal(result, expected) assert result.names == index.names @@ -100,8 +100,8 @@ def test_to_hierarchical(): check_stacklevel=False): result = index.to_hierarchical(3, 2) expected = MultiIndex(levels=[[1, 2], ['one', 'two']], - labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) + codes=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) tm.assert_index_equal(result, expected) assert result.names == index.names @@ -123,6 +123,7 @@ def test_to_hierarchical(): def test_roundtrip_pickle_with_tz(): + return # GH 8367 # round-trip of timezone @@ -135,6 +136,7 @@ def test_roundtrip_pickle_with_tz(): def test_pickle(indices): + return unpickled = tm.round_trip_pickle(indices) assert indices.equals(unpickled) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 0d09e3ef2e4b1..aaf2fe1cb635f 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -11,11 +11,11 @@ def assert_multiindex_copied(copy, original): # Levels should be (at least, shallow copied) tm.assert_copy(copy.levels, original.levels) - tm.assert_almost_equal(copy.labels, original.labels) + tm.assert_almost_equal(copy.codes, original.codes) # Labels doesn't matter which way copied - tm.assert_almost_equal(copy.labels, original.labels) - assert copy.labels is not original.labels + tm.assert_almost_equal(copy.codes, original.codes) + assert copy.codes is not original.codes # Names doesn't matter which way copied assert copy.names == original.names @@ -37,6 +37,12 @@ def test_shallow_copy(idx): assert_multiindex_copied(i_copy, idx) +def test_labels_deprecated(idx): + # GH23752 + with tm.assert_produces_warning(FutureWarning): + idx.copy(labels=idx.codes) + + def test_view(idx): i_view = idx.view() assert_multiindex_copied(i_view, idx) @@ -47,7 +53,7 @@ def test_copy_and_deepcopy(func): idx = MultiIndex( levels=[['foo', 'bar'], ['fizz', 'buzz']], - labels=[[0, 0, 0, 1], [0, 0, 1, 1]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], names=['first', 'second'] ) idx_copy = func(idx) @@ -59,7 +65,7 @@ def test_copy_and_deepcopy(func): def test_copy_method(deep): idx = MultiIndex( levels=[['foo', 'bar'], ['fizz', 'buzz']], - labels=[[0, 0, 0, 1], [0, 0, 1, 1]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], names=['first', 'second'] ) idx_copy = idx.copy(deep=deep) @@ -70,16 +76,16 @@ def test_copy_method(deep): @pytest.mark.parametrize('kwarg, value', [ ('names', ['thrid', 'fourth']), ('levels', [['foo2', 'bar2'], ['fizz2', 'buzz2']]), - ('labels', [[1, 0, 0, 0], [1, 1, 0, 0]]) + ('codes', [[1, 0, 0, 0], [1, 1, 0, 0]]) ]) def test_copy_method_kwargs(deep, kwarg, value): # gh-12309: Check that the "name" argument as well other kwargs are honored idx = MultiIndex( levels=[['foo', 'bar'], ['fizz', 'buzz']], - labels=[[0, 0, 0, 1], [0, 0, 1, 1]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], names=['first', 'second'] ) - + return idx_copy = idx.copy(**{kwarg: value, 'deep': deep}) if kwarg == 'names': assert getattr(idx_copy, kwarg) == value diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index a692b510c569c..66edd5b5343f4 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -71,7 +71,7 @@ def test_droplevel_with_names(idx): index = MultiIndex( levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], names=['one', 'two', 'three']) dropped = index.droplevel(0) @@ -85,7 +85,7 @@ def test_droplevel_with_names(idx): def test_droplevel_list(): index = MultiIndex( levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], names=['one', 'two', 'three']) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 4336d891adcdc..e75e6c7e83891 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -82,7 +82,7 @@ def test_get_unique_index(idx, dropna): tm.assert_index_equal(result, expected) -def test_duplicate_multiindex_labels(): +def test_duplicate_multiindex_codes(): # GH 17464 # Make sure that a MultiIndex with duplicate levels throws a ValueError with pytest.raises(ValueError): @@ -118,8 +118,8 @@ def test_duplicate_meta_data(): # GH 10115 mi = MultiIndex( levels=[[0, 1], [0, 1, 2]], - labels=[[0, 0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 0, 1, 2]]) + codes=[[0, 0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 0, 1, 2]]) for idx in [mi, mi.set_names([None, None]), @@ -137,8 +137,8 @@ def test_has_duplicates(idx, idx_dup): assert idx_dup.has_duplicates is True mi = MultiIndex(levels=[[0, 1], [0, 1, 2]], - labels=[[0, 0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 0, 1, 2]]) + codes=[[0, 0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 0, 1, 2]]) assert mi.is_unique is False assert mi.has_duplicates is True @@ -171,31 +171,31 @@ def test_has_duplicates_from_tuples(): def test_has_duplicates_overflow(): # handle int64 overflow if possible def check(nlevels, with_nulls): - labels = np.tile(np.arange(500), 2) + codes = np.tile(np.arange(500), 2) level = np.arange(500) if with_nulls: # inject some null values - labels[500] = -1 # common nan value - labels = [labels.copy() for i in range(nlevels)] + codes[500] = -1 # common nan value + codes = [codes.copy() for i in range(nlevels)] for i in range(nlevels): - labels[i][500 + i - nlevels // 2] = -1 + codes[i][500 + i - nlevels // 2] = -1 - labels += [np.array([-1, 1]).repeat(500)] + codes += [np.array([-1, 1]).repeat(500)] else: - labels = [labels] * nlevels + [np.arange(2).repeat(500)] + codes = [codes] * nlevels + [np.arange(2).repeat(500)] levels = [level] * nlevels + [[0, 1]] # no dups - mi = MultiIndex(levels=levels, labels=labels) + mi = MultiIndex(levels=levels, codes=codes) assert not mi.has_duplicates # with a dup if with_nulls: def f(a): return np.insert(a, 1000, a[0]) - labels = list(map(f, labels)) - mi = MultiIndex(levels=levels, labels=labels) + codes = list(map(f, codes)) + mi = MultiIndex(levels=levels, codes=codes) else: values = mi.values.tolist() mi = MultiIndex.from_tuples(values + [values[0]]) @@ -226,8 +226,8 @@ def test_duplicated_large(keep): # GH 9125 n, k = 200, 5000 levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - labels = [np.random.choice(n, k * n) for lev in levels] - mi = MultiIndex(levels=levels, labels=labels) + codes = [np.random.choice(n, k * n) for lev in levels] + mi = MultiIndex(levels=levels, codes=codes) result = mi.duplicated(keep=keep) expected = hashtable.duplicated_object(mi.values, keep=keep) @@ -250,9 +250,9 @@ def test_get_duplicates(): for n in range(1, 6): # 1st level shape for m in range(1, 5): # 2nd level shape # all possible unique combinations, including nan - lab = product(range(-1, n), range(-1, m)) + codes = product(range(-1, n), range(-1, m)) mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], - labels=np.random.permutation(list(lab)).T) + codes=np.random.permutation(list(codes)).T) assert len(mi) == (n + 1) * (m + 1) assert not mi.has_duplicates diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index bd1f313897ea2..6a9eb662dd9d4 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -99,10 +99,10 @@ def test_equals_multi(idx): # different number of levels index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) + index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1]) assert not index.equals(index2) assert not index.equal_levels(index2) @@ -110,11 +110,11 @@ def test_equals_multi(idx): major_axis = Index(lrange(4)) minor_axis = Index(lrange(2)) - major_labels = np.array([0, 0, 1, 2, 2, 3]) - minor_labels = np.array([0, 1, 0, 0, 1, 0]) + major_codes = np.array([0, 0, 1, 2, 2, 3]) + minor_codes = np.array([0, 1, 0, 0, 1, 0]) index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) + codes=[major_codes, minor_codes]) assert not idx.equals(index) assert not idx.equal_levels(index) @@ -122,11 +122,11 @@ def test_equals_multi(idx): major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) - major_labels = np.array([0, 0, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) + major_codes = np.array([0, 0, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) + codes=[major_codes, minor_codes]) assert not idx.equals(index) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 63936a74b6b8c..8a65a930a8ce5 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -3,6 +3,8 @@ import warnings +import pytest + import pandas as pd import pandas.util.testing as tm from pandas import MultiIndex, compat @@ -22,7 +24,7 @@ def test_format(idx): def test_format_integer_names(): index = MultiIndex(levels=[[0, 1], [0, 1]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) index.format(names=True) @@ -43,8 +45,8 @@ def test_format_sparse_config(idx): def test_format_sparse_display(): index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]], - labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], - [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) + codes=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) result = index.format() assert result[3] == '1 0 0 0' @@ -57,6 +59,7 @@ def test_repr_with_unicode_data(): assert "\\u" not in repr(index) # we don't want unicode-escaped +@pytest.mark.skip(reason="#22511 will remove this test") def test_repr_roundtrip(): mi = MultiIndex.from_product([list('ab'), range(3)], diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index a5f586bd98d5f..d201cb2eb178b 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -45,8 +45,8 @@ def test_get_level_values(idx): index = MultiIndex( levels=[CategoricalIndex(['A', 'B']), CategoricalIndex([1, 2, 3])], - labels=[np.array([0, 0, 0, 1, 1, 1]), - np.array([0, 1, 2, 0, 1, 2])]) + codes=[np.array([0, 0, 0, 1, 1, 1]), + np.array([0, 1, 2, 0, 1, 2])]) exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) tm.assert_index_equal(index.get_level_values(0), exp) @@ -57,8 +57,8 @@ def test_get_level_values(idx): def test_get_value_duplicates(): index = MultiIndex(levels=[['D', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=['tag', 'day']) assert index.get_loc('D') == slice(0, 3) @@ -151,27 +151,27 @@ def test_set_name_methods(idx, index_names): assert ind.names == new_names2 -def test_set_levels_labels_directly(idx): - # setting levels/labels directly raises AttributeError +def test_set_levels_codes_directly(idx): + # setting levels/codes directly raises AttributeError levels = idx.levels new_levels = [[lev + 'a' for lev in level] for level in levels] - labels = idx.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] with pytest.raises(AttributeError): idx.levels = new_levels with pytest.raises(AttributeError): - idx.labels = new_labels + idx.codes = new_codes def test_set_levels(idx): - # side note - you probably wouldn't want to use levels and labels + # side note - you probably wouldn't want to use levels and codes # directly like this - but it is possible. levels = idx.levels new_levels = [[lev + 'a' for lev in level] for level in levels] @@ -232,9 +232,9 @@ def test_set_levels(idx): check_dtype=True) with pytest.raises(ValueError, match="^On"): - idx.set_labels([0, 1, 2, 3, 4, 5], level=0, - inplace=inplace) - assert_matching(idx.labels, original_index.labels, + idx.set_codes([0, 1, 2, 3, 4, 5], level=0, + inplace=inplace) + assert_matching(idx.codes, original_index.codes, check_dtype=True) with pytest.raises(TypeError, match="^Levels"): @@ -242,92 +242,114 @@ def test_set_levels(idx): assert_matching(idx.levels, original_index.levels, check_dtype=True) - with pytest.raises(TypeError, match="^Labels"): - idx.set_labels(1, level=0, inplace=inplace) - assert_matching(idx.labels, original_index.labels, + with pytest.raises(TypeError, match="^Codes"): + idx.set_codes(1, level=0, inplace=inplace) + assert_matching(idx.codes, original_index.codes, check_dtype=True) -def test_set_labels(idx): - # side note - you probably wouldn't want to use levels and labels +def test_set_codes(idx): + # side note - you probably wouldn't want to use levels and codes # directly like this - but it is possible. - labels = idx.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] - - # label changing [w/o mutation] - ind2 = idx.set_labels(new_labels) - assert_matching(ind2.labels, new_labels) - assert_matching(idx.labels, labels) - - # label changing [w/ mutation] + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] + + # changing codes w/o mutation + ind2 = idx.set_codes(new_codes) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # changing label w/ mutation ind2 = idx.copy() - inplace_return = ind2.set_labels(new_labels, inplace=True) + inplace_return = ind2.set_codes(new_codes, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, new_labels) + assert_matching(ind2.codes, new_codes) - # label changing specific level [w/o mutation] - ind2 = idx.set_labels(new_labels[0], level=0) - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(idx.labels, labels) + # codes changing specific level w/o mutation + ind2 = idx.set_codes(new_codes[0], level=0) + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) - ind2 = idx.set_labels(new_labels[1], level=1) - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(idx.labels, labels) + ind2 = idx.set_codes(new_codes[1], level=1) + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) - # label changing multiple levels [w/o mutation] - ind2 = idx.set_labels(new_labels, level=[0, 1]) - assert_matching(ind2.labels, new_labels) - assert_matching(idx.labels, labels) + # codes changing multiple levels w/o mutation + ind2 = idx.set_codes(new_codes, level=[0, 1]) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) - # label changing specific level [w/ mutation] + # label changing specific level w/ mutation ind2 = idx.copy() - inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) + inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(idx.labels, labels) + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) ind2 = idx.copy() - inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) + inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(idx.labels, labels) + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) - # label changing multiple levels [w/ mutation] + # codes changing multiple levels [w/ mutation] ind2 = idx.copy() - inplace_return = ind2.set_labels(new_labels, level=[0, 1], - inplace=True) + inplace_return = ind2.set_codes(new_codes, level=[0, 1], + inplace=True) assert inplace_return is None - assert_matching(ind2.labels, new_labels) - assert_matching(idx.labels, labels) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) # label changing for levels of different magnitude of categories ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_codes = range(129, -1, -1) + expected = pd.MultiIndex.from_tuples( + [(0, i) for i in new_codes]) + + # [w/o mutation] + result = ind.set_codes(codes=new_codes, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + result.set_codes(codes=new_codes, level=1, inplace=True) + assert result.equals(expected) + + with tm.assert_produces_warning(FutureWarning): + ind.set_codes(labels=new_codes, level=1) + + +def test_set_labels_deprecated(): + # GH23752 + ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) new_labels = range(129, -1, -1) expected = pd.MultiIndex.from_tuples( [(0, i) for i in new_labels]) # [w/o mutation] - result = ind.set_labels(labels=new_labels, level=1) + with tm.assert_produces_warning(FutureWarning): + result = ind.set_labels(labels=new_labels, level=1) assert result.equals(expected) # [w/ mutation] result = ind.copy() - result.set_labels(labels=new_labels, level=1, inplace=True) + with tm.assert_produces_warning(FutureWarning): + result.set_labels(labels=new_labels, level=1, inplace=True) assert result.equals(expected) -def test_set_levels_labels_names_bad_input(idx): - levels, labels = idx.levels, idx.labels +def test_set_levels_codes_names_bad_input(idx): + levels, codes = idx.levels, idx.codes names = idx.names with pytest.raises(ValueError, match='Length of levels'): idx.set_levels([levels[0]]) - with pytest.raises(ValueError, match='Length of labels'): - idx.set_labels([labels[0]]) + with pytest.raises(ValueError, match='Length of codes'): + idx.set_codes([codes[0]]) with pytest.raises(ValueError, match='Length of names'): idx.set_names([names[0]]) @@ -338,7 +360,7 @@ def test_set_levels_labels_names_bad_input(idx): # shouldn't scalar data error, instead should demand list-like with pytest.raises(TypeError, match='list of lists-like'): - idx.set_labels(labels[0]) + idx.set_codes(codes[0]) # shouldn't scalar data error, instead should demand list-like with pytest.raises(TypeError, match='list-like'): @@ -353,10 +375,10 @@ def test_set_levels_labels_names_bad_input(idx): # should have equal lengths with pytest.raises(TypeError, match='list of lists-like'): - idx.set_labels(labels[0], level=[0, 1]) + idx.set_codes(codes[0], level=[0, 1]) with pytest.raises(TypeError, match='list-like'): - idx.set_labels(labels, level=0) + idx.set_codes(codes, level=0) # should have equal lengths with pytest.raises(ValueError, match='Length of names'): @@ -372,7 +394,7 @@ def test_set_names_with_nlevel_1(inplace): # Ensure that .set_names for MultiIndex with # nlevels == 1 does not raise any errors expected = pd.MultiIndex(levels=[[0, 1]], - labels=[[0, 1]], + codes=[[0, 1]], names=['first']) m = pd.MultiIndex.from_product([[0, 1]]) result = m.set_names('first', level=0, inplace=inplace) @@ -391,7 +413,7 @@ def test_set_levels_categorical(ordered): cidx = CategoricalIndex(list("bac"), ordered=ordered) result = index.set_levels(cidx, 0) expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], - labels=index.labels) + codes=index.codes) tm.assert_index_equal(result, expected) result_lvl = result.get_level_values(0) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 23f48db751804..c40ecd9e82a07 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -70,7 +70,7 @@ def test_slice_locs_with_type_mismatch(): def test_slice_locs_not_sorted(): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" @@ -87,8 +87,8 @@ def test_slice_locs_not_contained(): # some searchsorted action index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]], - labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3], - [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) + codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], + [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) result = index.slice_locs((1, 0), (5, 2)) assert result == (3, 6) @@ -126,11 +126,11 @@ def test_get_indexer(): major_axis = Index(lrange(4)) minor_axis = Index(lrange(2)) - major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) - minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) + codes=[major_codes, minor_codes]) idx1 = index[:5] idx2 = index[[1, 3, 5]] @@ -247,7 +247,7 @@ def test_getitem_bool_index_single(ind1, ind2): expected = pd.MultiIndex(levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)], - labels=[[], []]) + codes=[[], []]) tm.assert_index_equal(idx[ind2], expected) @@ -262,7 +262,7 @@ def test_get_loc(idx): # 3 levels index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) pytest.raises(KeyError, index.get_loc, (1, 1)) assert index.get_loc((2, 0)) == slice(3, 5) @@ -283,7 +283,7 @@ def test_get_loc_duplicates(): def test_get_loc_level(): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) loc, new_index = index.get_loc_level((0, 1)) @@ -303,7 +303,7 @@ def test_get_loc_level(): # Unused label on unsorted level: pytest.raises(KeyError, index.drop(1, level=2).get_loc_level, 2, 2) - index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( + index = MultiIndex(levels=[[2000], lrange(4)], codes=[np.array( [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) result, new_index = index.get_loc_level((2000, slice(None, None))) expected = slice(None, None) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 2ec08fa89d133..b0a7da9e41958 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -16,19 +16,19 @@ def test_labels_dtypes(): # GH 8456 i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - assert i.labels[0].dtype == 'int8' - assert i.labels[1].dtype == 'int8' + assert i.codes[0].dtype == 'int8' + assert i.codes[1].dtype == 'int8' i = MultiIndex.from_product([['a'], range(40)]) - assert i.labels[1].dtype == 'int8' + assert i.codes[1].dtype == 'int8' i = MultiIndex.from_product([['a'], range(400)]) - assert i.labels[1].dtype == 'int16' + assert i.codes[1].dtype == 'int16' i = MultiIndex.from_product([['a'], range(40000)]) - assert i.labels[1].dtype == 'int32' + assert i.codes[1].dtype == 'int32' i = pd.MultiIndex.from_product([['a'], range(1000)]) - assert (i.labels[0] >= 0).all() - assert (i.labels[1] >= 0).all() + assert (i.codes[0] >= 0).all() + assert (i.codes[1] >= 0).all() def test_values_boxed(): @@ -98,18 +98,18 @@ def test_consistency(): major_axis = lrange(70000) minor_axis = lrange(10) - major_labels = np.arange(70000) - minor_labels = np.repeat(lrange(10), 7000) + major_codes = np.arange(70000) + minor_codes = np.repeat(lrange(10), 7000) # the fact that is works means it's consistent index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) + codes=[major_codes, minor_codes]) # inconsistent - major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) + major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) + codes=[major_codes, minor_codes]) assert index.is_unique is False @@ -194,7 +194,7 @@ def test_can_hold_identifiers(idx): def test_metadata_immutable(idx): - levels, labels = idx.levels, idx.labels + levels, codes = idx.levels, idx.codes # shouldn't be able to set at either the top level or base level mutable_regex = re.compile('does not support mutable operations') with pytest.raises(TypeError, match=mutable_regex): @@ -203,9 +203,9 @@ def test_metadata_immutable(idx): levels[0][0] = levels[0][0] # ditto for labels with pytest.raises(TypeError, match=mutable_regex): - labels[0] = labels[0] + codes[0] = codes[0] with pytest.raises(TypeError, match=mutable_regex): - labels[0][0] = labels[0][0] + codes[0][0] = codes[0][0] # and for names names = idx.names with pytest.raises(TypeError, match=mutable_regex): diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 8ce33f100a6af..a5838ae9cac4d 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -110,10 +110,10 @@ def test_nan_stays_float(): # GH 7031 idx0 = pd.MultiIndex(levels=[["A", "B"], []], - labels=[[1, 0], [-1, -1]], + codes=[[1, 0], [-1, -1]], names=[0, 1]) idx1 = pd.MultiIndex(levels=[["C"], ["D"]], - labels=[[0], [0]], + codes=[[0], [0]], names=[0, 1]) idxm = idx0.join(idx1, how='outer') assert pd.isna(idx0.get_level_values(1)).all() diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index a854035b37544..3c7db70b7e242 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -39,8 +39,8 @@ def test_is_monotonic_increasing(): # string ordering i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) assert i.is_monotonic is False assert Index(i.values).is_monotonic is False @@ -49,8 +49,8 @@ def test_is_monotonic_increasing(): i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['mom', 'next', 'zenith']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) assert i.is_monotonic is True assert Index(i.values).is_monotonic is True @@ -62,7 +62,7 @@ def test_is_monotonic_increasing(): levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237', 'nl0000289783', 'nl0000289965', 'nl0000301109']], - labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], names=['household_id', 'asset_id']) assert i.is_monotonic is False @@ -109,8 +109,8 @@ def test_is_monotonic_decreasing(): # string ordering i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], ['three', 'two', 'one']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) assert i.is_monotonic_decreasing is False assert Index(i.values).is_monotonic_decreasing is False @@ -119,8 +119,8 @@ def test_is_monotonic_decreasing(): i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], ['zenith', 'next', 'mom']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) assert i.is_monotonic_decreasing is True assert Index(i.values).is_monotonic_decreasing is True @@ -132,7 +132,7 @@ def test_is_monotonic_decreasing(): levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965', 'nl0000289783', 'lu0197800237', 'gb00b03mlx29']], - labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], names=['household_id', 'asset_id']) assert i.is_monotonic_decreasing is False @@ -148,14 +148,14 @@ def test_is_monotonic_decreasing(): def test_is_strictly_monotonic_increasing(): idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']], - labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + codes=[[0, 0, 1, 1], [0, 0, 0, 1]]) assert idx.is_monotonic_increasing is True assert idx._is_strictly_monotonic_increasing is False def test_is_strictly_monotonic_decreasing(): idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']], - labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + codes=[[0, 0, 1, 1], [0, 0, 0, 1]]) assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index 1f63f1ef100c1..b79d341030687 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -99,14 +99,14 @@ def test_names(idx, index_names): # initializing with bad names (should always be equivalent) major_axis, minor_axis = idx.levels - major_labels, minor_labels = idx.labels + major_codes, minor_codes = idx.codes with pytest.raises(ValueError, match="^Length of names"): MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], + codes=[major_codes, minor_codes], names=['first']) with pytest.raises(ValueError, match="^Length of names"): MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], + codes=[major_codes, minor_codes], names=['first', 'second', 'third']) # names are assigned diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 7ad9b43e4c723..5ff97743be444 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -172,7 +172,7 @@ def test_reconstruct_sort(): # cannot convert to lexsorted mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], - labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + codes=[[0, 1, 0, 2], [2, 0, 0, 1]], names=['col1', 'col2']) assert not mi.is_lexsorted() assert not mi.is_monotonic @@ -197,14 +197,14 @@ def test_reconstruct_remove_unused(): # removed levels are there expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], [1, 2, 3]], - labels=[[1, 2], [1, 2]], + codes=[[1, 2], [1, 2]], names=['first', 'second']) result = df2.index tm.assert_index_equal(result, expected) expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], [2, 3]], - labels=[[0, 1], [0, 1]], + codes=[[0, 1], [0, 1]], names=['first', 'second']) result = df2.index.remove_unused_levels() tm.assert_index_equal(result, expected) @@ -251,7 +251,7 @@ def test_remove_unused_levels_large(first_type, second_type): def test_remove_unused_nan(level0, level1): # GH 18417 mi = pd.MultiIndex(levels=[level0, level1], - labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) + codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) result = mi.remove_unused_levels() tm.assert_index_equal(result, mi) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index fe7391ff15ebe..2580a47e8fdd3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -448,7 +448,7 @@ def test_constructor_empty(self, value, klass): (PeriodIndex((x for x in []), freq='B'), PeriodIndex), (RangeIndex(step=1), pd.RangeIndex), (MultiIndex(levels=[[1, 2], ['blue', 'red']], - labels=[[], []]), MultiIndex) + codes=[[], []]), MultiIndex) ]) def test_constructor_empty_special(self, empty, klass): assert isinstance(empty, klass) diff --git a/pandas/tests/indexing/multiindex/conftest.py b/pandas/tests/indexing/multiindex/conftest.py index f578fe7c0f60f..046fc19c0d9c8 100644 --- a/pandas/tests/indexing/multiindex/conftest.py +++ b/pandas/tests/indexing/multiindex/conftest.py @@ -10,8 +10,8 @@ def multiindex_dataframe_random_data(): """DataFrame with 2 level MultiIndex with random data""" index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) return DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index efc1ebcbecee7..00b30bab37441 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -64,8 +64,8 @@ def test_getitem_duplicates_multiindex(self): index = MultiIndex(levels=[['D', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=['tag', 'day']) arr = np.random.randn(len(index), 1) df = DataFrame(arr, index=index, columns=['val']) @@ -87,8 +87,8 @@ def f(): # A is treated as a special Timestamp index = MultiIndex(levels=[['A', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=['tag', 'day']) df = DataFrame(arr, index=index, columns=['val']) result = df.val['A'] @@ -264,8 +264,8 @@ def test_getitem_toplevel(self, multiindex_dataframe_random_data): def test_getitem_int(self, multiindex_dataframe_random_data): levels = [[0, 1], [0, 1, 2]] - labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] - index = MultiIndex(levels=levels, labels=labels) + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + index = MultiIndex(levels=levels, codes=codes) frame = DataFrame(np.random.randn(6, 2), index=index) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index f31685641753e..47a46bc05d0d9 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -11,7 +11,7 @@ def single_level_multiindex(): """single level MultiIndex""" return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) + codes=[[0, 1, 2, 3]], names=['first']) @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") @@ -40,7 +40,7 @@ def test_loc_getitem_series(self): empty = Series(data=[], dtype=np.float64) expected = Series([], index=MultiIndex( - levels=index.levels, labels=[[], []], dtype=np.float64)) + levels=index.levels, codes=[[], []], dtype=np.float64)) result = x.loc[empty] tm.assert_series_equal(result, expected) @@ -60,7 +60,7 @@ def test_loc_getitem_array(self): # empty array: empty = np.array([]) expected = Series([], index=MultiIndex( - levels=index.levels, labels=[[], []], dtype=np.float64)) + levels=index.levels, codes=[[], []], dtype=np.float64)) result = x.loc[empty] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index dc2bd4d36e9fb..2e37ebe4a0629 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -52,9 +52,9 @@ def test_xs_partial(self, multiindex_dataframe_random_data, # ex from #1796 index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]], - labels=[[0, 0, 0, 0, 1, 1, 1, 1], - [0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1, - 0, 1]]) + codes=[[0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1, + 0, 1]]) df = DataFrame(np.random.randn(8, 4), index=index, columns=list('abcd')) @@ -68,7 +68,7 @@ def test_getitem_partial( ymd = ymd.T result = ymd[2000, 2] - expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) + expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) tm.assert_frame_equal(result, expected) @@ -82,12 +82,12 @@ def test_fancy_slice_partial( ymd = multiindex_year_month_day_dataframe_random_data result = ymd.loc[(2000, 2):(2000, 4)] - lev = ymd.index.labels[1] + lev = ymd.index.codes[1] expected = ymd[(lev >= 1) & (lev <= 3)] tm.assert_frame_equal(result, expected) def test_getitem_partial_column_select(self): - idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], + idx = MultiIndex(codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], levels=[['a', 'b'], ['x', 'y'], ['p', 'q']]) df = DataFrame(np.random.rand(3, 2), index=idx) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 7288983f5f04b..bc00481ddfd90 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -277,8 +277,8 @@ def test_frame_getitem_setitem_boolean( def test_frame_getitem_setitem_multislice(self): levels = [['t1', 't2'], ['a', 'b', 'c']] - labels = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] - midx = MultiIndex(labels=labels, levels=levels, names=[None, 'id']) + codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] + midx = MultiIndex(codes=codes, levels=levels, names=[None, 'id']) df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx) result = df.loc[:, 'value'] @@ -350,7 +350,7 @@ def test_getitem_setitem_tuple_plus_columns( def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], - labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) frame = DataFrame(np.random.randn(len(index), 4), index=index, columns=['a', 'b', 'c', 'd']) diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py index 898959d74383a..f565c30fc3e2c 100644 --- a/pandas/tests/indexing/multiindex/test_sorted.py +++ b/pandas/tests/indexing/multiindex/test_sorted.py @@ -39,7 +39,7 @@ def test_frame_getitem_not_sorted2(self): df2_original = df2.copy() df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True) - df2.index.set_labels([0, 1, 0, 2], level='col1', inplace=True) + df2.index.set_codes([0, 1, 0, 2], level='col1', inplace=True) assert not df2.index.is_lexsorted() assert not df2.index.is_monotonic diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 97790920d46f7..14ef6237e8ddd 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -711,8 +711,8 @@ def test_multiindex_xs(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) mgr.set_axis(1, index) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index cba3f000b59c1..69fdb7329a165 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -327,11 +327,11 @@ def test_to_csv_multi_index(self): @pytest.mark.parametrize("ind,expected", [ (pd.MultiIndex(levels=[[1.0]], - labels=[[0]], + codes=[[0]], names=["x"]), "x,data\n1.0,1\n"), (pd.MultiIndex(levels=[[1.], [2.]], - labels=[[0], [0]], + codes=[[0], [0]], names=["x", "y"]), "x,y,data\n1.0,2.0,1\n") ]) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index ce9aca3a87c51..627689b865148 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -403,10 +403,10 @@ def test_to_html_no_index_max_rows(self, datapath): def test_to_html_multiindex_max_cols(self, datapath): # GH 6131 index = MultiIndex(levels=[['ba', 'bb', 'bc'], ['ca', 'cb', 'cc']], - labels=[[0, 1, 2], [0, 1, 2]], + codes=[[0, 1, 2], [0, 1, 2]], names=['b', 'c']) columns = MultiIndex(levels=[['d'], ['aa', 'ab', 'ac']], - labels=[[0, 0, 0], [0, 1, 2]], + codes=[[0, 0, 0], [0, 1, 2]], names=[None, 'a']) data = np.array( [[1., np.nan, np.nan], [np.nan, 2., np.nan], [np.nan, np.nan, 3.]]) diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 47b13ae6c50b1..38f4cc42357fa 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -236,7 +236,7 @@ def test_header_multi_index_common_format_malformed1(all_parsers): columns=MultiIndex(levels=[[u("a"), u("b"), u("c")], [u("r"), u("s"), u("t"), u("u"), u("v")]], - labels=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], + codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], names=[u("a"), u("q")])) data = """a,a,a,b,c,c q,r,s,t,u,v @@ -255,7 +255,7 @@ def test_header_multi_index_common_format_malformed2(all_parsers): columns=MultiIndex(levels=[[u("a"), u("b"), u("c")], [u("r"), u("s"), u("t"), u("u"), u("v")]], - labels=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], + codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], names=[None, u("q")])) data = """,a,a,b,c,c @@ -272,10 +272,10 @@ def test_header_multi_index_common_format_malformed3(all_parsers): expected = DataFrame(np.array( [[3, 4, 5, 6], [9, 10, 11, 12]], dtype="int64"), index=MultiIndex(levels=[[1, 7], [2, 8]], - labels=[[0, 1], [0, 1]]), + codes=[[0, 1], [0, 1]]), columns=MultiIndex(levels=[[u("a"), u("b"), u("c")], [u("s"), u("t"), u("u"), u("v")]], - labels=[[0, 1, 2, 2], [0, 1, 2, 3]], + codes=[[0, 1, 2, 2], [0, 1, 2, 3]], names=[None, u("q")])) data = """,a,a,b,c,c q,r,s,t,u,v diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 8c2de40b46114..6421afba18f94 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -148,5 +148,5 @@ def test_multi_index_naming_not_all_at_beginning(all_parsers): expected = DataFrame({"Unnamed: 2": ["c", "d", "c", "d"]}, index=MultiIndex( levels=[['a', 'b'], [1, 2, 3, 4]], - labels=[[0, 0, 1, 1], [0, 1, 2, 3]])) + codes=[[0, 0, 1, 1], [0, 1, 2, 3]])) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 8cc3dee6648a8..033d600ffc09b 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -260,7 +260,7 @@ def test_index_col_empty(self, ext): index_col=["A", "B", "C"]) expected = DataFrame(columns=["D", "E", "F"], index=MultiIndex(levels=[[]] * 3, - labels=[[]] * 3, + codes=[[]] * 3, names=["A", "B", "C"])) tm.assert_frame_equal(result, expected) @@ -1014,7 +1014,7 @@ def test_excel_old_index_format(self, ext): "R_l0_g2", "R_l0_g3", "R_l0_g4"], ["R1", "R_l1_g0", "R_l1_g1", "R_l1_g2", "R_l1_g3", "R_l1_g4"]], - labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], + codes=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], names=[None, None]) si = Index(["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], name=None) @@ -1041,7 +1041,7 @@ def test_excel_old_index_format(self, ext): "R_l0_g3", "R_l0_g4"], ["R_l1_g0", "R_l1_g1", "R_l1_g2", "R_l1_g3", "R_l1_g4"]], - labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], + codes=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], names=[None, None]) si = Index(["R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], name=None) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 19ecb378b6378..44d642399ced9 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -100,15 +100,19 @@ def test_rw_nthreads(self): "the 'nthreads' keyword is deprecated, " "use 'use_threads' instead" ) - with tm.assert_produces_warning(FutureWarning) as w: + # TODO: make the warning work with check_stacklevel=True + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False) as w: self.check_round_trip(df, nthreads=2) - assert len(w) == 1 - assert expected_warning in str(w[0]) + # we have an extra FutureWarning because of #GH23752 + assert any(expected_warning in str(x) for x in w) - with tm.assert_produces_warning(FutureWarning) as w: + # TODO: make the warning work with check_stacklevel=True + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False) as w: self.check_round_trip(df, nthreads=1) - assert len(w) == 1 - assert expected_warning in str(w[0]) + # we have an extra FutureWarnings because of #GH23752 + assert any(expected_warning in str(x) for x in w) def test_rw_use_threads(self): df = pd.DataFrame({'A': np.arange(100000)}) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 4201f751959b5..492089644fb15 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -798,7 +798,7 @@ def test_header_inferred_from_rows_with_only_th(self): """)[0] columns = MultiIndex(levels=[['A', 'B'], ['a', 'b']], - labels=[[0, 1], [0, 1]]) + codes=[[0, 1], [0, 1]]) expected = DataFrame(data=[[1, 2]], columns=columns) tm.assert_frame_equal(result, expected) @@ -995,7 +995,7 @@ def test_ignore_empty_rows_when_inferring_header(self): """)[0] columns = MultiIndex(levels=[['A', 'B'], ['a', 'b']], - labels=[[0, 1], [0, 1]]) + codes=[[0, 1], [0, 1]]) expected = DataFrame(data=[[1, 2]], columns=columns) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 84a0e3d867783..17f27e60ec28f 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1774,8 +1774,8 @@ def test_append_diff_item_order(self): def test_append_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) df = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) @@ -1908,8 +1908,8 @@ def test_select_columns_in_where(self): # in the `where` argument index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo_name', 'bar_name']) # With a DataFrame @@ -2877,8 +2877,8 @@ def test_can_serialize_dates(self): def test_store_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 99386e594ff3a..083ce16ef9296 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -401,8 +401,8 @@ def test_join_inner_multiindex(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) to_join = DataFrame(np.random.randn(10, 3), index=index, columns=['j_one', 'j_two', 'j_three']) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index a1158201844b0..aa32948468907 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -32,8 +32,8 @@ def right(): """right dataframe (multi-indexed) for multi-index join tests""" index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['key1', 'key2']) return DataFrame(np.random.randn(10, 3), index=index, @@ -83,8 +83,8 @@ class TestMergeMulti(object): def setup_method(self): self.index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.to_join = DataFrame(np.random.randn(10, 3), index=self.index, columns=['j_one', 'j_two', 'j_three']) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 4113fb7f0f11e..488d800af2a39 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1188,8 +1188,8 @@ def test_concat_ignore_index(self, sort): def test_concat_multiindex_with_keys(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) @@ -1258,8 +1258,8 @@ def test_concat_keys_and_levels(self): names=names) expected = concat([df, df2, df, df2]) exp_index = MultiIndex(levels=levels + [[0]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1], - [0, 0, 0, 0]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1], + [0, 0, 0, 0]], names=names + [None]) expected.index = exp_index @@ -1591,10 +1591,10 @@ def test_concat_series(self): ts.index = DatetimeIndex(np.array(ts.index.values, dtype='M8[ns]')) - exp_labels = [np.repeat([0, 1, 2], [len(x) for x in pieces]), - np.arange(len(ts))] + exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), + np.arange(len(ts))] exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], - labels=exp_labels) + codes=exp_codes) expected.index = exp_index tm.assert_series_equal(result, expected) @@ -2141,8 +2141,8 @@ def test_concat_multiindex_rangeindex(self): df = DataFrame(np.random.randn(9, 2)) df.index = MultiIndex(levels=[pd.RangeIndex(3), pd.RangeIndex(3)], - labels=[np.repeat(np.arange(3), 3), - np.tile(np.arange(3), 3)]) + codes=[np.repeat(np.arange(3), 3), + np.tile(np.arange(3), 3)]) res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]]) exp = df.iloc[[2, 3, 4, 5], :] @@ -2161,7 +2161,7 @@ def test_concat_multiindex_dfs_with_deepcopy(self): expected_index = pd.MultiIndex(levels=[['s1', 's2'], ['a'], ['b', 'c']], - labels=[[0, 1], [0, 0], [0, 1]], + codes=[[0, 1], [0, 0], [0, 1]], names=['testname', None, None]) expected = pd.DataFrame([[0], [1]], index=expected_index) result_copy = pd.concat(deepcopy(example_dict), names=['testname']) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index b3dd94b49e3a3..e32e1999836ec 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -451,7 +451,7 @@ def test_pivot_with_list_like_values(self, values, method): [4, 5, 6, 'q', 'w', 't']] index = Index(data=['one', 'two'], name='foo') columns = MultiIndex(levels=[['baz', 'zoo'], ['A', 'B', 'C']], - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], names=[None, 'bar']) expected = DataFrame(data=data, index=index, columns=columns, dtype='object') @@ -482,7 +482,7 @@ def test_pivot_with_list_like_values_nans(self, values, method): ['C', np.nan, 3, np.nan]] index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo') columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[None, 'foo']) expected = DataFrame(data=data, index=index, columns=columns, dtype='object') @@ -501,7 +501,7 @@ def test_pivot_with_multiindex(self, method): ['two', 'B', 5, 'w'], ['two', 'C', 6, 't']] columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) df = DataFrame(data=data, index=index, columns=columns, dtype='object') if method: result = df.pivot(index=('bar', 'first'), @@ -1238,7 +1238,7 @@ def test_pivot_string_as_func(self): result = pivot_table(data, index='A', columns='B', aggfunc='sum') mi = MultiIndex(levels=[['C'], ['one', 'two']], - labels=[[0, 0], [0, 1]], names=[None, 'B']) + codes=[[0, 0], [0, 1]], names=[None, 'B']) expected = DataFrame({('C', 'one'): {'bar': 15, 'foo': 13}, ('C', 'two'): {'bar': 7, 'foo': 20}}, columns=mi).rename_axis('A') @@ -1247,7 +1247,7 @@ def test_pivot_string_as_func(self): result = pivot_table(data, index='A', columns='B', aggfunc=['sum', 'mean']) mi = MultiIndex(levels=[['sum', 'mean'], ['C'], ['one', 'two']], - labels=[[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 1]], + codes=[[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 1]], names=[None, None, 'B']) expected = DataFrame({('mean', 'C', 'one'): {'bar': 5.0, 'foo': 3.25}, ('mean', 'C', 'two'): {'bar': 7.0, @@ -1724,8 +1724,8 @@ def test_crosstab_with_numpy_size(self): values=df['D']) expected_index = pd.MultiIndex(levels=[['All', 'one', 'three', 'two'], ['', 'A', 'B', 'C']], - labels=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], - [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]], + codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], + [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]], names=['A', 'B']) expected_column = pd.Index(['bar', 'foo', 'All'], dtype='object', diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index d8b3d9588f2f1..0d26e9c375d0d 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -613,7 +613,7 @@ def test_preserve_categorical_dtype(self): for ordered in [False, True]: cidx = pd.CategoricalIndex(list("xyz"), ordered=ordered) midx = pd.MultiIndex(levels=[['a'], cidx], - labels=[[0, 0], [0, 1]]) + codes=[[0, 0], [0, 1]]) df = DataFrame([[10, 11]], index=midx) expected = DataFrame([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index f969619d5acb0..92c41f65eb831 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -711,8 +711,8 @@ def test_type_promote_putmask(): def test_multilevel_preserve_name(): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) s = Series(np.random.randn(len(index)), index=index, name='sth') diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 79de3dc3be19f..99a4f0c424ce9 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -133,8 +133,8 @@ def test_reset_index(self): # level index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1]]) + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], + [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) rs = s.reset_index(level=1) assert len(rs.columns) == 2 @@ -204,8 +204,8 @@ def test_reset_index_range(self): def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], + [0, 1, 0, 1, 0, 1]], names=['L0', 'L1', 'L2']) s = Series(np.arange(6), index=index) @@ -220,8 +220,8 @@ def test_reorder_levels(self): # rotate, position result = s.reorder_levels([1, 2, 0]) e_idx = MultiIndex(levels=[['one', 'two', 'three'], [0, 1], ['bar']], - labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], - [0, 0, 0, 0, 0, 0]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], + [0, 0, 0, 0, 0, 0]], names=['L1', 'L2', 'L0']) expected = Series(np.arange(6), index=e_idx) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 6e40324c67b59..a9c8e855cd324 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -296,8 +296,8 @@ def test_kurt(self, string_series): self._check_stat_op('kurt', alt, string_series) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1]]) + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], + [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) tm.assert_almost_equal(s.kurt(), s.kurt(level=0)['bar']) @@ -1481,7 +1481,7 @@ def test_unstack(self): from numpy import nan index = MultiIndex(levels=[['bar', 'foo'], ['one', 'three', 'two']], - labels=[[1, 1, 0, 0], [0, 1, 0, 2]]) + codes=[[1, 1, 0, 0], [0, 1, 0, 2]]) s = Series(np.arange(4.), index=index) unstacked = s.unstack() @@ -1496,11 +1496,11 @@ def test_unstack(self): assert_frame_equal(unstacked, expected.T) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1]]) + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], + [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) exp_index = MultiIndex(levels=[['one', 'two', 'three'], [0, 1]], - labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) expected = DataFrame({'bar': s.values}, index=exp_index).sort_index(level=0) unstacked = s.unstack(0).sort_index() diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index c4a0496f7fb27..86de8176a9a65 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -25,8 +25,8 @@ class TestSeriesRepr(TestData): def test_multilevel_name_print(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) s = Series(lrange(0, len(index)), index=index, name='sth') expected = ["first second", "foo one 0", diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 969c20601c7c8..b9cf845ea47d7 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1018,8 +1018,8 @@ def test_get_level_values_box(self): dates = date_range('1/1/2000', periods=4) levels = [dates, [0, 1]] - labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] + codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] - index = MultiIndex(levels=levels, labels=labels) + index = MultiIndex(levels=levels, codes=codes) assert isinstance(index.get_level_values(0)[0], Timestamp) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index cc4ee7ca72343..6c1a2490ea76e 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -28,14 +28,14 @@ def setup_method(self, method): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) + codes=[[0, 1, 2, 3]], names=['first']) # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], @@ -292,7 +292,7 @@ def _check_counts(frame, axis=0): def test_count_level_series(self): index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', 'three', 'four']], - labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]]) + codes=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]]) s = Series(np.random.randn(len(index)), index=index) @@ -410,7 +410,7 @@ def check(left, right): columns=['1st', '2nd', '3rd']) mi = MultiIndex(levels=[['a', 'b'], ['1st', '2nd', '3rd']], - labels=[np.tile( + codes=[np.tile( np.arange(2).repeat(3), 2), np.tile( np.arange(3), 4)]) @@ -418,7 +418,7 @@ def check(left, right): check(left, right) df.columns = ['1st', '2nd', '1st'] - mi = MultiIndex(levels=[['a', 'b'], ['1st', '2nd']], labels=[np.tile( + mi = MultiIndex(levels=[['a', 'b'], ['1st', '2nd']], codes=[np.tile( np.arange(2).repeat(3), 2), np.tile( [0, 1, 0], 4)]) @@ -428,7 +428,7 @@ def check(left, right): tpls = ('a', 2), ('b', 1), ('a', 1), ('b', 2) df.index = MultiIndex.from_tuples(tpls) mi = MultiIndex(levels=[['a', 'b'], [1, 2], ['1st', '2nd']], - labels=[np.tile( + codes=[np.tile( np.arange(2).repeat(3), 2), np.repeat( [1, 0, 1], [3, 6, 3]), np.tile( [0, 1, 0], 4)]) @@ -708,9 +708,9 @@ def test_unstack_sparse_keyspace(self): def test_unstack_unobserved_keys(self): # related to #2278 refactoring levels = [[0, 1], [0, 1, 2, 3]] - labels = [[0, 0, 1, 1], [0, 2, 0, 2]] + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] - index = MultiIndex(levels, labels) + index = MultiIndex(levels, codes) df = DataFrame(np.random.randn(4, 2), index=index) @@ -736,8 +736,8 @@ def manual_compare_stacked(df, df_stacked, lev0, lev1): for levels in levels_poss: columns = MultiIndex(levels=levels, - labels=[[0, 0, 1, 1], - [0, 1, 0, 1]]) + codes=[[0, 0, 1, 1], + [0, 1, 0, 1]]) df = DataFrame(columns=columns, data=[range(4)]) for stack_lev in range(2): df_stacked = df.stack(stack_lev) @@ -746,14 +746,14 @@ def manual_compare_stacked(df, df_stacked, lev0, lev1): # check multi-row case mi = MultiIndex(levels=[["A", "C", "B"], ["B", "A", "C"]], - labels=[np.repeat(range(3), 3), np.tile(range(3), 3)]) + codes=[np.repeat(range(3), 3), np.tile(range(3), 3)]) df = DataFrame(columns=mi, index=range(5), data=np.arange(5 * len(mi)).reshape(5, -1)) manual_compare_stacked(df, df.stack(0), 0, 1) def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], - labels=[[0], [0], [0]], + codes=[[0], [0], [0]], names=['one', 'two', 'three']) df = DataFrame([np.random.rand(4)], columns=['a', 'b', 'c', 'd'], index=midx) @@ -1040,11 +1040,11 @@ def test_unstack_preserve_types(self): assert unstacked['F', 1].dtype == np.float64 def test_unstack_group_index_overflow(self): - labels = np.tile(np.arange(500), 2) + codes = np.tile(np.arange(500), 2) level = np.arange(500) index = MultiIndex(levels=[level] * 8 + [[0, 1]], - labels=[labels] * 8 + [np.arange(2).repeat(500)]) + codes=[codes] * 8 + [np.arange(2).repeat(500)]) s = Series(np.arange(1000), index=index) result = s.unstack() @@ -1056,7 +1056,7 @@ def test_unstack_group_index_overflow(self): # put it at beginning index = MultiIndex(levels=[[0, 1]] + [level] * 8, - labels=[np.arange(2).repeat(500)] + [labels] * 8) + codes=[np.arange(2).repeat(500)] + [codes] * 8) s = Series(np.arange(1000), index=index) result = s.unstack(0) @@ -1064,8 +1064,8 @@ def test_unstack_group_index_overflow(self): # put it in middle index = MultiIndex(levels=[level] * 4 + [[0, 1]] + [level] * 4, - labels=([labels] * 4 + [np.arange(2).repeat(500)] + - [labels] * 4)) + codes=([codes] * 4 + [np.arange(2).repeat(500)] + + [codes] * 4)) s = Series(np.arange(1000), index=index) result = s.unstack(4) @@ -1111,7 +1111,7 @@ def test_to_html(self): def test_level_with_tuples(self): index = MultiIndex(levels=[[('foo', 'bar', 0), ('foo', 'baz', 0), ( 'foo', 'qux', 0)], [0, 1]], - labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) @@ -1134,7 +1134,7 @@ def test_level_with_tuples(self): index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), ( 'foo', 'qux')], [0, 1]], - labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) @@ -1306,8 +1306,8 @@ def test_drop_preserve_names(self): def test_unicode_repr_issues(self): levels = [Index([u('a/\u03c3'), u('b/\u03c3'), u('c/\u03c3')]), Index([0, 1])] - labels = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] - index = MultiIndex(levels=levels, labels=labels) + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) repr(index.levels) @@ -1379,8 +1379,8 @@ def test_assign_index_sequences(self): def test_tuples_have_na(self): index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], - labels=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, - 1, 2, 3]]) + codes=[[1, 1, 1, 1, -1, 0, 0, 0], + [0, 1, 2, 3, 0, 1, 2, 3]]) assert isna(index[4][0]) assert isna(index.values[4][0]) @@ -1827,15 +1827,15 @@ def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) assert index.is_lexsorted() index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) assert not index.is_lexsorted() index = MultiIndex(levels=levels, - labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) + codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) assert not index.is_lexsorted() assert index.lexsort_depth == 0 @@ -1865,7 +1865,7 @@ def test_sort_index_and_reconstruction(self): result = DataFrame( [[1, 1], [2, 2], [1, 1], [2, 2]], index=MultiIndex(levels=[[0.5, 0.8], ['a', 'b']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + codes=[[0, 0, 1, 1], [0, 1, 0, 1]])) result = result.sort_index() assert result.index.is_lexsorted() @@ -1903,7 +1903,7 @@ def test_sort_index_and_reconstruction_doc_example(self): df = DataFrame({'value': [1, 2, 3, 4]}, index=MultiIndex( levels=[['a', 'b'], ['bb', 'aa']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + codes=[[0, 0, 1, 1], [0, 1, 0, 1]])) assert df.index.is_lexsorted() assert not df.index.is_monotonic @@ -1911,7 +1911,7 @@ def test_sort_index_and_reconstruction_doc_example(self): expected = DataFrame({'value': [2, 1, 4, 3]}, index=MultiIndex( levels=[['a', 'b'], ['aa', 'bb']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + codes=[[0, 0, 1, 1], [0, 1, 0, 1]])) result = df.sort_index() assert result.index.is_lexsorted() assert result.index.is_monotonic diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index c0c4e627b1b2e..33f2c34400373 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1760,7 +1760,7 @@ def test_to_frame_multi_major(self): def test_to_frame_multi_major_minor(self): cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( 2, 'two'), (3, 'three'), (4, 'four')]) df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], @@ -2486,10 +2486,10 @@ def is_sorted(arr): return (arr[1:] > arr[:-1]).any() sorted_minor = self.panel.sort_index(level=1) - assert is_sorted(sorted_minor.index.labels[1]) + assert is_sorted(sorted_minor.index.codes[1]) sorted_major = sorted_minor.sort_index(level=0) - assert is_sorted(sorted_major.index.labels[0]) + assert is_sorted(sorted_major.index.codes[0]) def test_to_string(self): buf = StringIO() @@ -2561,7 +2561,7 @@ def test_axis_dummies(self): def test_get_dummies(self): from pandas.core.reshape.reshape import get_dummies, make_axis_dummies - self.panel['Label'] = self.panel.index.labels[1] + self.panel['Label'] = self.panel.index.codes[1] minor_dummies = make_axis_dummies(self.panel, 'minor').astype(np.uint8) dummies = get_dummies(self.panel['Label']) tm.assert_numpy_array_equal(dummies.values, minor_dummies.values) @@ -2584,14 +2584,14 @@ def test_count(self): index = self.panel.index major_count = self.panel.count(level=0)['ItemA'] - labels = index.labels[0] + level_codes = index.codes[0] for i, idx in enumerate(index.levels[0]): - assert major_count[i] == (labels == i).sum() + assert major_count[i] == (level_codes == i).sum() minor_count = self.panel.count(level=1)['ItemA'] - labels = index.labels[1] + level_codes = index.codes[1] for i, idx in enumerate(index.levels[1]): - assert minor_count[i] == (labels == i).sum() + assert minor_count[i] == (level_codes == i).sum() def test_join(self): lp1 = self.panel.filter(['ItemA', 'ItemB']) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 84bc1863aadd9..d36de931e2610 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -150,7 +150,7 @@ def test_multiindex_unique(): def test_multiindex_objects(): mi = MultiIndex(levels=[["b", "d", "a"], [1, 2, 3]], - labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + codes=[[0, 1, 0, 2], [2, 0, 0, 1]], names=["col1", "col2"]) recons = mi._sort_levels_monotonic() diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 97c64d013d241..7a1828149cd87 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -838,7 +838,7 @@ def _check_types(l, r, obj='Index'): def _get_ilevel_values(index, level): # accept level number only unique = index.levels[level] - labels = index.labels[level] + labels = index.codes[level] filled = take_1d(unique.values, labels, fill_value=unique._na_value) values = unique._shallow_copy(filled, name=index.names[level]) return values