From edd31662ba2132705dc265b0f6e99f0d20e38ee6 Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 20 Sep 2018 21:00:35 +0100 Subject: [PATCH 1/3] rename MultiIndex.labels -> codes --- pandas/core/indexes/multi.py | 82 ++++++++++--------- .../tests/indexes/multi/test_constructor.py | 2 +- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 01304cce507f0..01eb5cfacef38 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -137,7 +137,12 @@ class MultiIndex(Index): ---------- levels : sequence of arrays The unique labels for each level + codes : sequence of arrays + Integers for each level designating which label at each location labels : sequence of arrays + .. deprecated:: 0.24.0 + Use ``codes`` instead + Integers for each level designating which label at each location sortorder : optional int Level of sortedness (must be lexicographically sorted by that @@ -181,6 +186,7 @@ class MultiIndex(Index): ---------- names levels + codes labels nlevels levshape @@ -205,7 +211,7 @@ class MultiIndex(Index): _typ = 'multiindex' _names = FrozenList() _levels = FrozenList() - _labels = FrozenList() + _codes = FrozenList() _comparables = ['names'] rename = Index.set_names @@ -227,7 +233,7 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, # we've already validated levels and labels, so shortcut here result._set_levels(levels, copy=copy, validate=False) - result._set_labels(labels, copy=copy, validate=False) + result._set_codes(labels, copy=copy, validate=False) if names is not None: # handles name validation @@ -244,39 +250,39 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, result._reset_identity() return result - def _verify_integrity(self, labels=None, levels=None): + def _verify_integrity(self, codes=None, levels=None): """ Parameters ---------- - labels : optional list - Labels to check for validity. Defaults to current labels. + codes : optional list + Codes to check for validity. Defaults to current codes. levels : optional list Levels to check for validity. Defaults to current levels. Raises ------ ValueError - If length of levels and labels don't match, if any label would + If length of levels and codes don't match, if any code would exceed level bounds, or there are any duplicate levels. """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. - labels = labels or self.labels + codes = codes or self.labels levels = levels or self.levels - if len(levels) != len(labels): - raise ValueError("Length of levels and labels must match. NOTE:" + if len(levels) != len(codes): + raise ValueError("Length of levels and codes must match. NOTE:" " this index is in an inconsistent state.") - label_length = len(self.labels[0]) - for i, (level, label) in enumerate(zip(levels, labels)): - if len(label) != label_length: - raise ValueError("Unequal label lengths: %s" % - ([len(lab) for lab in labels])) - if len(label) and label.max() >= len(level): - raise ValueError("On level %d, label max (%d) >= length of" + codes_length = len(self.labels[0]) + for i, (level, level_codes) in enumerate(zip(levels, codes)): + if len(level_codes) != codes_length: + raise ValueError("Unequal code lengths: %s" % + ([len(code_) for code_ in codes])) + if len(level_codes) and level_codes.max() >= len(level): + raise ValueError("On level %d, code max (%d) >= length of" " level (%d). NOTE: this index is in an" - " inconsistent state" % (i, label.max(), + " inconsistent state" % (i, level_codes.max(), len(level))) if not level.is_unique: raise ValueError("Level values must be unique: {values} on " @@ -414,33 +420,33 @@ def set_levels(self, levels, level=None, inplace=False, @property def labels(self): - return self._labels + return self._codes - def _set_labels(self, labels, level=None, copy=False, validate=True, - verify_integrity=False): + def _set_codes(self, codes, level=None, copy=False, validate=True, + verify_integrity=False): - if validate and level is None and len(labels) != self.nlevels: - raise ValueError("Length of labels must match number of levels") - if validate and level is not None and len(labels) != len(level): - raise ValueError('Length of labels must match length of levels.') + if validate and level is None and len(codes) != self.nlevels: + raise ValueError("Length of codes must match number of levels") + if validate and level is not None and len(codes) != len(level): + raise ValueError('Length of codes must match length of levels.') if level is None: - new_labels = FrozenList( - _ensure_frozen(lab, lev, copy=copy)._shallow_copy() - for lev, lab in zip(self.levels, labels)) + new_codes = FrozenList( + _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy() + for lev, level_codes in zip(self.levels, codes)) else: level = [self._get_level_number(l) for l in level] - new_labels = list(self._labels) - for lev_idx, lab in zip(level, labels): + new_codes = list(self._codes) + for lev_idx, level_codes in zip(level, codes): lev = self.levels[lev_idx] - new_labels[lev_idx] = _ensure_frozen( - lab, lev, copy=copy)._shallow_copy() - new_labels = FrozenList(new_labels) + new_codes[lev_idx] = _ensure_frozen( + level_codes, lev, copy=copy)._shallow_copy() + new_codes = FrozenList(new_codes) if verify_integrity: - self._verify_integrity(labels=new_labels) + self._verify_integrity(codes=new_codes) - self._labels = new_labels + self._codes = new_codes self._tuples = None self._reset_cache() @@ -503,7 +509,7 @@ def set_labels(self, labels, level=None, inplace=False, else: idx = self._shallow_copy() idx._reset_identity() - idx._set_labels(labels, level=level, verify_integrity=verify_integrity) + idx._set_codes(labels, level=level, verify_integrity=verify_integrity) if not inplace: return idx @@ -639,7 +645,7 @@ def _format_attrs(self): attrs = [ ('levels', ibase.default_pprint(self._levels, max_seq_items=False)), - ('labels', ibase.default_pprint(self._labels, + ('labels', ibase.default_pprint(self._codes, max_seq_items=False))] if com._any_not_none(*self.names): attrs.append(('names', ibase.default_pprint(self.names))) @@ -1558,7 +1564,7 @@ def remove_unused_levels(self): if changed: result._reset_identity() result._set_levels(new_levels, validate=False) - result._set_labels(new_labels, validate=False) + result._set_codes(new_labels, validate=False) return result @@ -1594,7 +1600,7 @@ def __setstate__(self, state): levels, labels, sortorder, names = own_state self._set_levels([Index(x) for x in levels], validate=False) - self._set_labels(labels) + self._set_codes(labels) self._set_names(names) self.sortorder = sortorder self._verify_integrity() diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index fb15d674613d4..a1f7460af73e2 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -67,7 +67,7 @@ def test_constructor_mismatched_label_levels(idx): MultiIndex(levels=levels, labels=labels) length_error = re.compile('>= length of level') - label_error = re.compile(r'Unequal label lengths: \[4, 2\]') + label_error = re.compile(r'Unequal code lengths: \[4, 2\]') # important to check that it's looking at the right thing. with pytest.raises(ValueError, match=length_error): From 643ae52881dd0ba1ec7727e41752d411c5b70bbc Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 27 Oct 2018 12:09:32 +0100 Subject: [PATCH 2/3] MultiIndex.set_labels -> set_codes --- pandas/core/indexes/multi.py | 36 +++-- pandas/tests/indexes/multi/test_compat.py | 4 +- .../tests/indexes/multi/test_constructor.py | 2 +- pandas/tests/indexes/multi/test_get_set.py | 137 ++++++++++++------ pandas/tests/test_multilevel.py | 2 +- 5 files changed, 118 insertions(+), 63 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 01eb5cfacef38..557f4266d1edc 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -42,6 +42,7 @@ _index_shared_docs) from pandas.core.indexes.frozen import FrozenList, _ensure_frozen import pandas.core.indexes.base as ibase +from pandas.util._decorators import deprecate_kwarg _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( dict(klass='MultiIndex', @@ -263,8 +264,8 @@ def _verify_integrity(self, codes=None, levels=None): Raises ------ ValueError - If length of levels and codes don't match, if any code would - exceed level bounds, or there are any duplicate levels. + If length of levels and codes don't match, if the codes for any + level would exceed level bounds, or there are any duplicate levels. """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. @@ -452,14 +453,23 @@ def _set_codes(self, codes, level=None, copy=False, validate=True, def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): + warnings.warn(("set_labels was deprecated in version 0.24.0." + "Use set_codes instead."), + FutureWarning, stacklevel=2) + return self.set_codes(labels, level=level, inplace=inplace, + verify_integrity=verify_integrity) + + @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes') + def set_codes(self, codes, level=None, inplace=False, + verify_integrity=True): """ - Set new labels on MultiIndex. Defaults to returning + Set new codes on MultiIndex. Defaults to returning new index. Parameters ---------- - labels : sequence or list of sequence - new labels to apply + codes : sequence or list of sequence + new codes to apply level : int, level name, or sequence of int/level names (default None) level(s) to set (None for all levels) inplace : bool @@ -494,22 +504,22 @@ def set_labels(self, labels, level=None, inplace=False, names=[u'foo', u'bar']) """ if level is not None and not is_list_like(level): - if not is_list_like(labels): - raise TypeError("Labels must be list-like") - if is_list_like(labels[0]): - raise TypeError("Labels must be list-like") + if not is_list_like(codes): + raise TypeError("Codes must be list-like") + if is_list_like(codes[0]): + raise TypeError("Codes must be list-like") level = [level] - labels = [labels] + codes = [codes] elif level is None or is_list_like(level): - if not is_list_like(labels) or not is_list_like(labels[0]): - raise TypeError("Labels must be list of lists-like") + if not is_list_like(codes) or not is_list_like(codes[0]): + raise TypeError("Codes must be list of lists-like") if inplace: idx = self else: idx = self._shallow_copy() idx._reset_identity() - idx._set_codes(labels, level=level, verify_integrity=verify_integrity) + idx._set_codes(codes, level=level, verify_integrity=verify_integrity) if not inplace: return idx diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 23ea0c306d47c..0353e39935cb6 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -92,7 +92,7 @@ def test_inplace_mutation_resets_values(): # Must be 1d array of tuples assert exp_values.shape == (6,) - new_values = mi2.set_labels(labels2).values + new_values = mi2.set_codes(labels2).values # Not inplace shouldn't change tm.assert_almost_equal(mi2._tuples, vals2) @@ -101,7 +101,7 @@ def test_inplace_mutation_resets_values(): tm.assert_almost_equal(exp_values, new_values) # ...and again setting inplace should kill _tuples, etc - mi2.set_labels(labels2, inplace=True) + mi2.set_codes(labels2, inplace=True) tm.assert_almost_equal(mi2.values, new_values) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index a1f7460af73e2..bb9b2877d519c 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -82,7 +82,7 @@ def test_constructor_mismatched_label_levels(idx): idx.copy().set_levels([['a'], ['b']]) with pytest.raises(ValueError, match=label_error): - idx.copy().set_labels([[0, 0, 0, 0], [0, 0]]) + idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) def test_copy_in_constructor(): diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index a5f586bd98d5f..d534ac717cbee 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -171,7 +171,7 @@ def test_set_levels_labels_directly(idx): def test_set_levels(idx): - # side note - you probably wouldn't want to use levels and labels + # side note - you probably wouldn't want to use levels and codes # directly like this - but it is possible. levels = idx.levels new_levels = [[lev + 'a' for lev in level] for level in levels] @@ -231,9 +231,15 @@ def test_set_levels(idx): assert_matching(idx.levels, original_index.levels, check_dtype=True) +<<<<<<< HEAD with pytest.raises(ValueError, match="^On"): idx.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) +======= + with tm.assert_raises_regex(ValueError, "^On"): + idx.set_codes([0, 1, 2, 3, 4, 5], level=0, + inplace=inplace) +>>>>>>> MultiIndex.set_labels -> set_codes assert_matching(idx.labels, original_index.labels, check_dtype=True) @@ -242,92 +248,118 @@ def test_set_levels(idx): assert_matching(idx.levels, original_index.levels, check_dtype=True) +<<<<<<< HEAD with pytest.raises(TypeError, match="^Labels"): idx.set_labels(1, level=0, inplace=inplace) +======= + with tm.assert_raises_regex(TypeError, "^Codes"): + idx.set_codes(1, level=0, inplace=inplace) +>>>>>>> MultiIndex.set_labels -> set_codes assert_matching(idx.labels, original_index.labels, check_dtype=True) -def test_set_labels(idx): - # side note - you probably wouldn't want to use levels and labels +def test_set_codes(idx): + # side note - you probably wouldn't want to use levels and codes # directly like this - but it is possible. - labels = idx.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] - - # label changing [w/o mutation] - ind2 = idx.set_labels(new_labels) - assert_matching(ind2.labels, new_labels) - assert_matching(idx.labels, labels) - - # label changing [w/ mutation] + codes = idx.labels + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] + + # changing codes w/o mutation + ind2 = idx.set_codes(new_codes) + assert_matching(ind2.labels, new_codes) + assert_matching(idx.labels, codes) + + # changing label w/ mutation ind2 = idx.copy() - inplace_return = ind2.set_labels(new_labels, inplace=True) + inplace_return = ind2.set_codes(new_codes, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, new_labels) + assert_matching(ind2.labels, new_codes) - # label changing specific level [w/o mutation] - ind2 = idx.set_labels(new_labels[0], level=0) - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(idx.labels, labels) + # codes changing specific level w/o mutation + ind2 = idx.set_codes(new_codes[0], level=0) + assert_matching(ind2.labels, [new_codes[0], codes[1]]) + assert_matching(idx.labels, codes) - ind2 = idx.set_labels(new_labels[1], level=1) - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(idx.labels, labels) + ind2 = idx.set_codes(new_codes[1], level=1) + assert_matching(ind2.labels, [codes[0], new_codes[1]]) + assert_matching(idx.labels, codes) - # label changing multiple levels [w/o mutation] - ind2 = idx.set_labels(new_labels, level=[0, 1]) - assert_matching(ind2.labels, new_labels) - assert_matching(idx.labels, labels) + # codes changing multiple levels w/o mutation + ind2 = idx.set_codes(new_codes, level=[0, 1]) + assert_matching(ind2.labels, new_codes) + assert_matching(idx.labels, codes) - # label changing specific level [w/ mutation] + # label changing specific level w/ mutation ind2 = idx.copy() - inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) + inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(idx.labels, labels) + assert_matching(ind2.labels, [new_codes[0], codes[1]]) + assert_matching(idx.labels, codes) ind2 = idx.copy() - inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) + inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(idx.labels, labels) + assert_matching(ind2.labels, [codes[0], new_codes[1]]) + assert_matching(idx.labels, codes) - # label changing multiple levels [w/ mutation] + # codes changing multiple levels [w/ mutation] ind2 = idx.copy() - inplace_return = ind2.set_labels(new_labels, level=[0, 1], - inplace=True) + inplace_return = ind2.set_codes(new_codes, level=[0, 1], + inplace=True) assert inplace_return is None - assert_matching(ind2.labels, new_labels) - assert_matching(idx.labels, labels) + assert_matching(ind2.labels, new_codes) + assert_matching(idx.labels, codes) # label changing for levels of different magnitude of categories + ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_codes = range(129, -1, -1) + expected = pd.MultiIndex.from_tuples( + [(0, i) for i in new_codes]) + + # [w/o mutation] + result = ind.set_codes(codes=new_codes, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + result.set_codes(codes=new_codes, level=1, inplace=True) + assert result.equals(expected) + + with tm.assert_produces_warning(FutureWarning): + ind.set_codes(labels=new_codes, level=1) + + +def test_set_labels_deprecated(): ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) new_labels = range(129, -1, -1) expected = pd.MultiIndex.from_tuples( [(0, i) for i in new_labels]) # [w/o mutation] - result = ind.set_labels(labels=new_labels, level=1) + with tm.assert_produces_warning(FutureWarning): + result = ind.set_labels(labels=new_labels, level=1) assert result.equals(expected) # [w/ mutation] result = ind.copy() - result.set_labels(labels=new_labels, level=1, inplace=True) + with tm.assert_produces_warning(FutureWarning): + result.set_labels(labels=new_labels, level=1, inplace=True) assert result.equals(expected) -def test_set_levels_labels_names_bad_input(idx): - levels, labels = idx.levels, idx.labels +def test_set_levels_codes_names_bad_input(idx): + levels, codes = idx.levels, idx.labels names = idx.names with pytest.raises(ValueError, match='Length of levels'): idx.set_levels([levels[0]]) - with pytest.raises(ValueError, match='Length of labels'): - idx.set_labels([labels[0]]) + with tm.assert_raises_regex(ValueError, 'Length of codes'): + idx.set_codes([codes[0]]) with pytest.raises(ValueError, match='Length of names'): idx.set_names([names[0]]) @@ -337,8 +369,13 @@ def test_set_levels_labels_names_bad_input(idx): idx.set_levels(levels[0]) # shouldn't scalar data error, instead should demand list-like +<<<<<<< HEAD with pytest.raises(TypeError, match='list of lists-like'): idx.set_labels(labels[0]) +======= + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + idx.set_codes(codes[0]) +>>>>>>> MultiIndex.set_labels -> set_codes # shouldn't scalar data error, instead should demand list-like with pytest.raises(TypeError, match='list-like'): @@ -352,11 +389,19 @@ def test_set_levels_labels_names_bad_input(idx): idx.set_levels(levels, level=0) # should have equal lengths +<<<<<<< HEAD with pytest.raises(TypeError, match='list of lists-like'): idx.set_labels(labels[0], level=[0, 1]) with pytest.raises(TypeError, match='list-like'): idx.set_labels(labels, level=0) +======= + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + idx.set_codes(codes[0], level=[0, 1]) + + with tm.assert_raises_regex(TypeError, 'list-like'): + idx.set_codes(codes, level=0) +>>>>>>> MultiIndex.set_labels -> set_codes # should have equal lengths with pytest.raises(ValueError, match='Length of names'): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 2717b92e05a29..ec250f477f804 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2596,7 +2596,7 @@ def test_frame_getitem_not_sorted2(self): df2_original = df2.copy() df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True) - df2.index.set_labels([0, 1, 0, 2], level='col1', inplace=True) + df2.index.set_codes([0, 1, 0, 2], level='col1', inplace=True) assert not df2.index.is_lexsorted() assert not df2.index.is_monotonic From 38b241b0bef2f16d434cb4696fb8e7a0eaf63e35 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 27 Oct 2018 19:24:28 +0100 Subject: [PATCH 3/3] MultiIndex.labels -> MulitIndex.codes --- pandas/core/frame.py | 13 +-- pandas/core/groupby/generic.py | 2 +- pandas/core/indexes/base.py | 34 +++---- pandas/core/indexes/multi.py | 86 +++++++++--------- pandas/core/panel.py | 18 ++-- pandas/core/reshape/concat.py | 24 ++--- pandas/core/reshape/merge.py | 28 +++--- pandas/core/reshape/reshape.py | 86 +++++++++--------- pandas/core/series.py | 8 +- pandas/core/util/hashing.py | 2 +- pandas/io/formats/excel.py | 14 +-- pandas/io/pytables.py | 8 +- pandas/tests/frame/test_alter_axes.py | 8 +- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/indexes/multi/test_astype.py | 2 +- .../tests/indexes/multi/test_constructor.py | 20 ++--- pandas/tests/indexes/multi/test_copy.py | 8 +- .../tests/indexes/multi/test_equivalence.py | 14 +-- pandas/tests/indexes/multi/test_get_set.py | 88 +++++++------------ pandas/tests/indexes/multi/test_integrity.py | 24 ++--- pandas/tests/indexes/multi/test_names.py | 10 +-- pandas/tests/test_multilevel.py | 24 ++--- pandas/tests/test_panel.py | 14 +-- pandas/util/testing.py | 2 +- test_fast.bat | 2 +- 25 files changed, 260 insertions(+), 281 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f8d153327f135..c1b421ed8f373 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1807,7 +1807,7 @@ def to_panel(self): selfsorted = self major_axis, minor_axis = selfsorted.index.levels - major_labels, minor_labels = selfsorted.index.labels + major_codes, minor_codes = selfsorted.index.codes shape = len(major_axis), len(minor_axis) # preserve names, if any @@ -1822,8 +1822,8 @@ def to_panel(self): # create new manager new_mgr = selfsorted._data.reshape_nd(axes=new_axes, - labels=[major_labels, - minor_labels], + labels=[major_codes, + minor_codes], shape=shape, ref_items=selfsorted.columns) @@ -4259,7 +4259,7 @@ def _maybe_casted_values(index, labels=None): if isinstance(self.index, MultiIndex): names = [n if n is not None else ('level_%d' % i) for (i, n) in enumerate(self.index.names)] - to_insert = lzip(self.index.levels, self.index.labels) + to_insert = lzip(self.index.levels, self.index.codes) else: default = 'index' if 'index' not in self else 'level_0' names = ([default] if self.index.name is None @@ -7167,8 +7167,9 @@ def _count_level(self, level, axis=0, numeric_only=False): level = count_axis._get_level_number(level) level_index = count_axis.levels[level] - labels = ensure_int64(count_axis.labels[level]) - counts = lib.count_level_2d(mask, labels, len(level_index), axis=0) + level_codes = ensure_int64(count_axis.codes[level]) + counts = lib.count_level_2d(mask, level_codes, len(level_index), + axis=0) result = DataFrame(counts, index=level_index, columns=agg_axis) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 451f1199ac8e6..36fbf762967c6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1111,7 +1111,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, lab = cut(Series(val), bins, include_lowest=True) lev = lab.cat.categories lab = lev.take(lab.cat.codes) - llab = lambda lab, inc: lab[inc]._multiindex.labels[-1] + llab = lambda lab, inc: lab[inc]._multiindex.codes[-1] if is_interval_dtype(lab): # TODO: should we do this inside II? diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 263de57d32f31..39d95d2b5d442 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3258,19 +3258,19 @@ def droplevel(self, level=0): # The two checks above guarantee that here self is a MultiIndex new_levels = list(self.levels) - new_labels = list(self.labels) + new_codes = list(self.codes) new_names = list(self.names) for i in levnums: new_levels.pop(i) - new_labels.pop(i) + new_codes.pop(i) new_names.pop(i) if len(new_levels) == 1: # set nan if needed - mask = new_labels[0] == -1 - result = new_levels[0].take(new_labels[0]) + mask = new_codes[0] == -1 + result = new_levels[0].take(new_codes[0]) if mask.any(): result = result.putmask(mask, np.nan) @@ -3278,7 +3278,7 @@ def droplevel(self, level=0): return result else: from .multi import MultiIndex - return MultiIndex(levels=new_levels, labels=new_labels, + return MultiIndex(levels=new_levels, labels=new_codes, names=new_names, verify_integrity=False) _index_shared_docs['get_indexer'] = """ @@ -4054,7 +4054,7 @@ def _get_leaf_sorter(labels): left_indexer = None join_index = left else: # sort the leaves - left_indexer = _get_leaf_sorter(left.labels[:level + 1]) + left_indexer = _get_leaf_sorter(left.codes[:level + 1]) join_index = left[left_indexer] else: @@ -4062,11 +4062,11 @@ def _get_leaf_sorter(labels): rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) - new_lev_labels = algos.take_nd(rev_indexer, left.labels[level], + new_lev_labels = algos.take_nd(rev_indexer, left.codes[level], allow_fill=False) - new_labels = list(left.labels) - new_labels[level] = new_lev_labels + new_codes = list(left.codes) + new_codes[level] = new_lev_labels new_levels = list(left.levels) new_levels[level] = new_level @@ -4075,7 +4075,7 @@ def _get_leaf_sorter(labels): left_indexer = np.arange(len(left), dtype=np.intp) mask = new_lev_labels != -1 if not mask.all(): - new_labels = [lab[mask] for lab in new_labels] + new_codes = [lab[mask] for lab in new_codes] left_indexer = left_indexer[mask] else: # tie out the order with other @@ -4086,31 +4086,31 @@ def _get_leaf_sorter(labels): # missing values are placed first; drop them! left_indexer = left_indexer[counts[0]:] - new_labels = [lab[left_indexer] for lab in new_labels] + new_codes = [lab[left_indexer] for lab in new_codes] else: # sort the leaves mask = new_lev_labels != -1 mask_all = mask.all() if not mask_all: - new_labels = [lab[mask] for lab in new_labels] + new_codes = [lab[mask] for lab in new_codes] - left_indexer = _get_leaf_sorter(new_labels[:level + 1]) - new_labels = [lab[left_indexer] for lab in new_labels] + left_indexer = _get_leaf_sorter(new_codes[:level + 1]) + new_codes = [lab[left_indexer] for lab in new_codes] # left_indexers are w.r.t masked frame. # reverse to original frame! if not mask_all: left_indexer = mask.nonzero()[0][left_indexer] - join_index = MultiIndex(levels=new_levels, labels=new_labels, + join_index = MultiIndex(levels=new_levels, labels=new_codes, names=left.names, verify_integrity=False) if right_lev_indexer is not None: right_indexer = algos.take_nd(right_lev_indexer, - join_index.labels[level], + join_index.codes[level], allow_fill=False) else: - right_indexer = join_index.labels[level] + right_indexer = join_index.codes[level] if flip_order: left_indexer, right_indexer = right_indexer, left_indexer diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 557f4266d1edc..32b7db5064587 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -269,13 +269,13 @@ def _verify_integrity(self, codes=None, levels=None): """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. - codes = codes or self.labels + codes = codes or self.codes levels = levels or self.levels if len(levels) != len(codes): raise ValueError("Length of levels and codes must match. NOTE:" " this index is in an inconsistent state.") - codes_length = len(self.labels[0]) + codes_length = len(self.codes[0]) for i, (level, level_codes) in enumerate(zip(levels, codes)): if len(level_codes) != codes_length: raise ValueError("Unequal code lengths: %s" % @@ -420,7 +420,7 @@ def set_levels(self, levels, level=None, inplace=False, return idx @property - def labels(self): + def codes(self): return self._codes def _set_codes(self, codes, level=None, copy=False, validate=True, @@ -554,12 +554,12 @@ def copy(self, names=None, dtype=None, levels=None, labels=None, if levels is None: levels = deepcopy(self.levels) if labels is None: - labels = deepcopy(self.labels) + labels = deepcopy(self.codes) else: if levels is None: levels = self.levels if labels is None: - labels = self.labels + labels = self.codes return MultiIndex(levels=levels, labels=labels, names=names, sortorder=self.sortorder, verify_integrity=False, _set_identity=_set_identity) @@ -640,7 +640,7 @@ def _nbytes(self, deep=False): objsize = 24 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) - label_nbytes = sum(i.nbytes for i in self.labels) + label_nbytes = sum(i.nbytes for i in self.codes) names_nbytes = sum(getsizeof(i, objsize) for i in self.names) result = level_nbytes + label_nbytes + names_nbytes @@ -671,7 +671,7 @@ def _format_data(self, name=None): return None def __len__(self): - return len(self.labels[0]) + return len(self.codes[0]) def _get_names(self): return FrozenList(level.name for level in self.levels) @@ -736,7 +736,7 @@ def _format_native_types(self, na_rep='nan', **kwargs): new_labels = [] # go through the levels and format them - for level, label in zip(self.levels, self.labels): + for level, label in zip(self.levels, self.codes): level = level._format_native_types(na_rep=na_rep, **kwargs) # add nan values, if there are any mask = (label == -1) @@ -759,7 +759,7 @@ def _format_native_types(self, na_rep='nan', **kwargs): @Appender(_index_shared_docs['_get_grouper_for_level']) def _get_grouper_for_level(self, mapper, level): - indexer = self.labels[level] + indexer = self.codes[level] level_index = self.levels[level] if mapper is not None: @@ -840,8 +840,8 @@ def _engine(self): # Check the total number of bits needed for our representation: if lev_bits[0] > 64: # The levels would overflow a 64 bit uint - use Python integers: - return MultiIndexPyIntEngine(self.levels, self.labels, offsets) - return MultiIndexUIntEngine(self.levels, self.labels, offsets) + return MultiIndexPyIntEngine(self.levels, self.codes, offsets) + return MultiIndexUIntEngine(self.levels, self.codes, offsets) @property def values(self): @@ -952,7 +952,7 @@ def duplicated(self, keep='first'): from pandas._libs.hashtable import duplicated_int64 shape = map(len, self.levels) - ids = get_group_index(self.labels, shape, sort=False, xnull=False) + ids = get_group_index(self.codes, shape, sort=False, xnull=False) return duplicated_int64(ids, keep) @@ -964,7 +964,7 @@ def fillna(self, value=None, downcast=None): @Appender(_index_shared_docs['dropna']) def dropna(self, how='any'): - nans = [label == -1 for label in self.labels] + nans = [label == -1 for label in self.codes] if how == 'any': indexer = np.any(nans, axis=0) elif how == 'all': @@ -972,7 +972,7 @@ def dropna(self, how='any'): else: raise ValueError("invalid how option: {0}".format(how)) - new_labels = [label[~indexer] for label in self.labels] + new_labels = [label[~indexer] for label in self.codes] return self.copy(labels=new_labels, deep=True) def get_value(self, series, key): @@ -1054,7 +1054,7 @@ def _get_level_values(self, level, unique=False): """ values = self.levels[level] - labels = self.labels[level] + labels = self.codes[level] if unique: labels = algos.unique(labels) filled = algos.take_1d(values._values, labels, @@ -1113,7 +1113,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, return [] stringified_levels = [] - for lev, lab in zip(self.levels, self.labels): + for lev, lab in zip(self.levels, self.codes): na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) if len(lev) > 0: @@ -1253,7 +1253,7 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) """ levels = self.levels - labels = [np.repeat(x, n_repeat) for x in self.labels] + labels = [np.repeat(x, n_repeat) for x in self.codes] # Assumes that each label is divisible by n_shuffle labels = [x.reshape(n_shuffle, -1).ravel(order='F') for x in labels] names = self.names @@ -1280,7 +1280,7 @@ def lexsort_depth(self): else: return 0 - int64_labels = [ensure_int64(lab) for lab in self.labels] + int64_labels = [ensure_int64(lab) for lab in self.codes] for k in range(self.nlevels, 0, -1): if libalgos.is_lexsorted(int64_labels[:k]): return k @@ -1472,7 +1472,7 @@ def _sort_levels_monotonic(self): new_levels = [] new_labels = [] - for lev, lab in zip(self.levels, self.labels): + for lev, lab in zip(self.levels, self.codes): if not lev.is_monotonic: try: @@ -1534,7 +1534,7 @@ def remove_unused_levels(self): new_labels = [] changed = False - for lev, lab in zip(self.levels, self.labels): + for lev, lab in zip(self.levels, self.codes): # Since few levels are typically unused, bincount() is more # efficient than unique() - however it only accepts positive values @@ -1591,7 +1591,7 @@ def levshape(self): def __reduce__(self): """Necessary for making this object picklable""" d = dict(levels=[lev for lev in self.levels], - labels=[label for label in self.labels], + labels=[label for label in self.codes], sortorder=self.sortorder, names=list(self.names)) return ibase._new_Index, (self.__class__, d), None @@ -1621,7 +1621,7 @@ def __getitem__(self, key): key = com.cast_scalar_indexer(key) retval = [] - for lev, lab in zip(self.levels, self.labels): + for lev, lab in zip(self.levels, self.codes): if lab[key] == -1: retval.append(np.nan) else: @@ -1639,7 +1639,7 @@ def __getitem__(self, key): if isinstance(key, Index): key = np.asarray(key) - new_labels = [lab[key] for lab in self.labels] + new_labels = [lab[key] for lab in self.codes] return MultiIndex(levels=self.levels, labels=new_labels, names=self.names, sortorder=sortorder, @@ -1650,7 +1650,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = ensure_platform_int(indices) - taken = self._assert_take_fillable(self.labels, indices, + taken = self._assert_take_fillable(self.codes, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=-1) @@ -1666,7 +1666,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') raise ValueError(msg) - taken = [lab.take(indices) for lab in self.labels] + taken = [lab.take(indices) for lab in self.codes] mask = indices == -1 if mask.any(): masked = [] @@ -1676,7 +1676,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, masked.append(np.asarray(label_values)) taken = masked else: - taken = [lab.take(indices) for lab in self.labels] + taken = [lab.take(indices) for lab in self.codes] return taken def append(self, other): @@ -1719,7 +1719,7 @@ def repeat(self, repeats, *args, **kwargs): nv.validate_repeat(args, kwargs) return MultiIndex(levels=self.levels, labels=[label.view(np.ndarray).repeat(repeats) - for label in self.labels], names=self.names, + for label in self.codes], names=self.names, sortorder=self.sortorder, verify_integrity=False) def where(self, cond, other=None): @@ -1789,7 +1789,7 @@ def _drop_from_level(self, labels, level): index = self.levels[i] values = index.get_indexer(labels) - mask = ~algos.isin(self.labels[i], values) + mask = ~algos.isin(self.codes[i], values) return self[mask] @@ -1836,7 +1836,7 @@ def swaplevel(self, i=-2, j=-1): labels=[[0, 1, 0, 1], [0, 0, 1, 1]]) """ new_levels = list(self.levels) - new_labels = list(self.labels) + new_labels = list(self.codes) new_names = list(self.names) i = self._get_level_number(i) @@ -1862,7 +1862,7 @@ def reorder_levels(self, order): 'number of levels (%d), got %d' % (self.nlevels, len(order))) new_levels = [self.levels[i] for i in order] - new_labels = [self.labels[i] for i in order] + new_labels = [self.codes[i] for i in order] new_names = [self.names[i] for i in order] return MultiIndex(levels=new_levels, labels=new_labels, @@ -1886,7 +1886,7 @@ def cats(label): dtype=label.dtype) return [Categorical.from_codes(label, cats(label), ordered=True) - for label in self.labels] + for label in self.codes] def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ @@ -1924,13 +1924,13 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): raise ValueError("level must have same length as ascending") from pandas.core.sorting import lexsort_indexer - indexer = lexsort_indexer([self.labels[lev] for lev in level], + indexer = lexsort_indexer([self.codes[lev] for lev in level], orders=ascending) # level ordering else: - labels = list(self.labels) + labels = list(self.codes) shape = list(self.levshape) # partition labels and shape @@ -1950,7 +1950,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): indexer = indexer[::-1] indexer = ensure_platform_int(indexer) - new_labels = [lab.take(indexer) for lab in self.labels] + new_labels = [lab.take(indexer) for lab in self.codes] new_index = MultiIndex(labels=new_labels, levels=self.levels, names=self.names, sortorder=sortorder, @@ -2167,7 +2167,7 @@ def _partial_tup_index(self, tup, side='left'): n = len(tup) start, end = 0, len(self) - zipped = zip(tup, self.levels, self.labels) + zipped = zip(tup, self.levels, self.codes) for k, (lab, lev, labs) in enumerate(zipped): section = labs[start:end] @@ -2279,7 +2279,7 @@ def _maybe_to_slice(loc): loc = np.arange(start, stop, dtype='int64') for i, k in enumerate(follow_key, len(lead_key)): - mask = self.labels[i][loc] == self.levels[i].get_loc(k) + mask = self.codes[i][loc] == self.levels[i].get_loc(k) if not mask.all(): loc = loc[mask] if not len(loc): @@ -2430,7 +2430,7 @@ def _get_level_indexer(self, key, level=0, indexer=None): # if the indexer is provided, then use this level_index = self.levels[level] - labels = self.labels[level] + labels = self.codes[level] def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # given the inputs and the labels/indexer, compute an indexer set @@ -2662,7 +2662,7 @@ def truncate(self, before=None, after=None): new_levels = list(self.levels) new_levels[0] = new_levels[0][i:j] - new_labels = [lab[left:right] for lab in self.labels] + new_labels = [lab[left:right] for lab in self.codes] new_labels[0] = new_labels[0] - i return MultiIndex(levels=new_levels, labels=new_labels, @@ -2694,12 +2694,12 @@ def equals(self, other): return False for i in range(self.nlevels): - slabels = self.labels[i] + slabels = self.codes[i] slabels = slabels[slabels != -1] svalues = algos.take_nd(np.asarray(self.levels[i]._values), slabels, allow_fill=False) - olabels = other.labels[i] + olabels = other.codes[i] olabels = olabels[olabels != -1] ovalues = algos.take_nd( np.asarray(other.levels[i]._values), @@ -2871,7 +2871,7 @@ def insert(self, loc, item): new_levels = [] new_labels = [] - for k, level, labels in zip(item, self.levels, self.labels): + for k, level, labels in zip(item, self.levels, self.codes): if k not in level: # have to insert into level # must insert at end otherwise you have to recompute all the @@ -2895,7 +2895,7 @@ def delete(self, loc): ------- new_index : MultiIndex """ - new_labels = [np.delete(lab, loc) for lab in self.labels] + new_labels = [np.delete(lab, loc) for lab in self.codes] return MultiIndex(levels=self.levels, labels=new_labels, names=self.names, verify_integrity=False) @@ -2912,7 +2912,7 @@ def isin(self, values, level=None): else: num = self._get_level_number(level) levs = self.levels[num] - labs = self.labels[num] + labs = self.codes[num] sought_labels = levs.isin(values).nonzero()[0] if levs.size == 0: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index c878d16fac2e9..6a787e1b51c37 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -949,26 +949,26 @@ def to_frame(self, filter_observations=True): def construct_multi_parts(idx, n_repeat, n_shuffle=1): # Replicates and shuffles MultiIndex, returns individual attributes - labels = [np.repeat(x, n_repeat) for x in idx.labels] + codes = [np.repeat(x, n_repeat) for x in idx.codes] # Assumes that each label is divisible by n_shuffle - labels = [x.reshape(n_shuffle, -1).ravel(order='F') - for x in labels] - labels = [x[selector] for x in labels] + codes = [x.reshape(n_shuffle, -1).ravel(order='F') + for x in codes] + codes = [x[selector] for x in codes] levels = idx.levels names = idx.names - return labels, levels, names + return codes, levels, names def construct_index_parts(idx, major=True): levels = [idx] if major: - labels = [np.arange(N).repeat(K)[selector]] + codes = [np.arange(N).repeat(K)[selector]] names = idx.name or 'major' else: - labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] - labels = [labels.ravel()[selector]] + codes = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] + codes = [codes.ravel()[selector]] names = idx.name or 'minor' names = [names] - return labels, levels, names + return codes, levels, names if isinstance(self.major_axis, MultiIndex): major_labels, major_levels, major_names = construct_multi_parts( diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index f01c9d29fd457..cdd8a3cf33b41 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -555,9 +555,9 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): levels = [ensure_index(x) for x in levels] if not _all_indexes_same(indexes): - label_list = [] + codes_list = [] - # things are potentially different sizes, so compute the exact labels + # things are potentially different sizes, so compute the exact codes # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): @@ -570,18 +570,18 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): .format(key=key, level=level)) to_concat.append(np.repeat(i, len(index))) - label_list.append(np.concatenate(to_concat)) + codes_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) - label_list.extend(concat_index.labels) + codes_list.extend(concat_index.codes) else: codes, categories = _factorize_from_iterable(concat_index) levels.append(categories) - label_list.append(codes) + codes_list.append(codes) if len(names) == len(levels): names = list(names) @@ -594,7 +594,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): # also copies names = names + _get_consensus_names(indexes) - return MultiIndex(levels=levels, labels=label_list, names=names, + return MultiIndex(levels=levels, labels=codes_list, names=names, verify_integrity=False) new_index = indexes[0] @@ -605,8 +605,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): new_names = list(names) new_levels = list(levels) - # construct labels - new_labels = [] + # construct codes + new_codes = [] # do something a bit more speedy @@ -619,17 +619,17 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): raise ValueError('Values not found in passed level: {hlevel!s}' .format(hlevel=hlevel[mask])) - new_labels.append(np.repeat(mapped, n)) + new_codes.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) - new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) + new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) else: new_levels.append(new_index) - new_labels.append(np.tile(np.arange(n), kpieces)) + new_codes.append(np.tile(np.arange(n), kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) - return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, + return MultiIndex(levels=new_levels, labels=new_codes, names=new_names, verify_integrity=False) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e09cf0a527ff9..8ac5953a72312 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -859,9 +859,9 @@ def _get_merge_keys(self): left_keys.append(left._get_label_or_level_values(k)) join_names.append(k) if isinstance(self.right.index, MultiIndex): - right_keys = [lev._values.take(lab) - for lev, lab in zip(self.right.index.levels, - self.right.index.labels)] + right_keys = [lev._values.take(lev_codes) for lev, lev_codes + in zip(self.right.index.levels, + self.right.index.codes)] else: right_keys = [self.right.index.values] elif _any(self.right_on): @@ -873,9 +873,9 @@ def _get_merge_keys(self): right_keys.append(right._get_label_or_level_values(k)) join_names.append(k) if isinstance(self.left.index, MultiIndex): - left_keys = [lev._values.take(lab) - for lev, lab in zip(self.left.index.levels, - self.left.index.labels)] + left_keys = [lev._values.take(lev_codes) for lev, lev_codes + in zip(self.left.index.levels, + self.left.index.codes)] else: left_keys = [self.left.index.values] @@ -1421,27 +1421,29 @@ def _get_multiindex_indexer(join_keys, index, sort): fkeys = partial(_factorize_keys, sort=sort) # left & right join labels and num. of levels at each location - rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys))) + rcodes, lcodes, shape = map(list, zip(* map(fkeys, + index.levels, + join_keys))) if sort: - rlab = list(map(np.take, rlab, index.labels)) + rcodes = list(map(np.take, rcodes, index.codes)) else: i8copy = lambda a: a.astype('i8', subok=False, copy=True) - rlab = list(map(i8copy, index.labels)) + rcodes = list(map(i8copy, index.codes)) # fix right labels if there were any nulls for i in range(len(join_keys)): - mask = index.labels[i] == -1 + mask = index.codes[i] == -1 if mask.any(): # check if there already was any nulls at this location # if there was, it is factorized to `shape[i] - 1` - a = join_keys[i][llab[i] == shape[i] - 1] + a = join_keys[i][lcodes[i] == shape[i] - 1] if a.size == 0 or not a[0] != a[0]: shape[i] += 1 - rlab[i][mask] = shape[i] - 1 + rcodes[i][mask] = shape[i] - 1 # get flat i8 join keys - lkey, rkey = _get_join_keys(llab, rlab, shape, sort) + lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort) # factorize keys to a dense i8 space lkey, rkey, count = fkeys(lkey, rkey) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 065728fb239ae..c53877af8514e 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -100,7 +100,7 @@ def __init__(self, values, index, level=-1, value_columns=None, self.level = self.index._get_level_number(level) # when index includes `nan`, need to lift levels/strides by 1 - self.lift = 1 if -1 in self.index.labels[self.level] else 0 + self.lift = 1 if -1 in self.index.codes[self.level] else 0 self.new_index_levels = list(self.index.levels) self.new_index_names = list(self.index.names) @@ -115,9 +115,9 @@ def __init__(self, values, index, level=-1, value_columns=None, def _make_sorted_values_labels(self): v = self.level - labs = list(self.index.labels) + codes = list(self.index.codes) levs = list(self.index.levels) - to_sort = labs[:v] + labs[v + 1:] + [labs[v]] + to_sort = codes[:v] + codes[v + 1:] + [codes[v]] sizes = [len(x) for x in levs[:v] + levs[v + 1:] + [levs[v]]] comp_index, obs_ids = get_compressed_ids(to_sort, sizes) @@ -243,16 +243,16 @@ def get_new_columns(self): new_levels = self.value_columns.levels + (self.removed_level_full,) new_names = self.value_columns.names + (self.removed_name,) - new_labels = [lab.take(propagator) - for lab in self.value_columns.labels] + new_codes = [lab.take(propagator) + for lab in self.value_columns.codes] else: new_levels = [self.value_columns, self.removed_level_full] new_names = [self.value_columns.name, self.removed_name] - new_labels = [propagator] + new_codes = [propagator] # The two indices differ only if the unstacked level had unused items: if len(self.removed_level_full) != len(self.removed_level): - # In this case, we remap the new labels to the original level: + # In this case, we remap the new codes to the original level: repeater = self.removed_level_full.get_indexer(self.removed_level) if self.lift: repeater = np.insert(repeater, 0, -1) @@ -261,22 +261,22 @@ def get_new_columns(self): repeater = np.arange(stride) - self.lift # The entire level is then just a repetition of the single chunk: - new_labels.append(np.tile(repeater, width)) - return MultiIndex(levels=new_levels, labels=new_labels, + new_codes.append(np.tile(repeater, width)) + return MultiIndex(levels=new_levels, labels=new_codes, names=new_names, verify_integrity=False) def get_new_index(self): - result_labels = [lab.take(self.compressor) - for lab in self.sorted_labels[:-1]] + result_codes = [lab.take(self.compressor) + for lab in self.sorted_labels[:-1]] # construct the new index if len(self.new_index_levels) == 1: - lev, lab = self.new_index_levels[0], result_labels[0] + lev, lab = self.new_index_levels[0], result_codes[0] if (lab == -1).any(): lev = lev.insert(len(lev), lev._na_value) return lev.take(lab) - return MultiIndex(levels=self.new_index_levels, labels=result_labels, + return MultiIndex(levels=self.new_index_levels, labels=result_codes, names=self.new_index_names, verify_integrity=False) @@ -293,17 +293,17 @@ def _unstack_multiple(data, clocs, fill_value=None): rlocs = [i for i in range(index.nlevels) if i not in clocs] clevels = [index.levels[i] for i in clocs] - clabels = [index.labels[i] for i in clocs] + ccodes = [index.codes[i] for i in clocs] cnames = [index.names[i] for i in clocs] rlevels = [index.levels[i] for i in rlocs] - rlabels = [index.labels[i] for i in rlocs] + rcodes = [index.codes[i] for i in rlocs] rnames = [index.names[i] for i in rlocs] shape = [len(x) for x in clevels] - group_index = get_group_index(clabels, shape, sort=False, xnull=False) + group_index = get_group_index(ccodes, shape, sort=False, xnull=False) comp_ids, obs_ids = compress_group_index(group_index, sort=False) - recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels, + recons_codes = decons_obs_group_ids(comp_ids, obs_ids, shape, ccodes, xnull=False) if rlocs == []: @@ -311,7 +311,7 @@ def _unstack_multiple(data, clocs, fill_value=None): dummy_index = Index(obs_ids, name='__placeholder__') else: dummy_index = MultiIndex(levels=rlevels + [obs_ids], - labels=rlabels + [comp_ids], + labels=rcodes + [comp_ids], names=rnames + ['__placeholder__'], verify_integrity=False) @@ -322,7 +322,7 @@ def _unstack_multiple(data, clocs, fill_value=None): unstacked = dummy.unstack('__placeholder__', fill_value=fill_value) new_levels = clevels new_names = cnames - new_labels = recons_labels + new_codes = recons_codes else: if isinstance(data.columns, MultiIndex): result = data @@ -344,11 +344,11 @@ def _unstack_multiple(data, clocs, fill_value=None): new_levels = [unstcols.levels[0]] + clevels new_names = [data.columns.name] + cnames - new_labels = [unstcols.labels[0]] - for rec in recons_labels: - new_labels.append(rec.take(unstcols.labels[-1])) + new_codes = [unstcols.codes[0]] + for rec in recons_codes: + new_codes.append(rec.take(unstcols.codes[-1])) - new_columns = MultiIndex(levels=new_levels, labels=new_labels, + new_columns = MultiIndex(levels=new_levels, labels=new_codes, names=new_names, verify_integrity=False) if isinstance(unstacked, Series): @@ -467,21 +467,21 @@ def factorize(index): return _stack_multi_columns(frame, level_num=level_num, dropna=dropna) elif isinstance(frame.index, MultiIndex): new_levels = list(frame.index.levels) - new_labels = [lab.repeat(K) for lab in frame.index.labels] + new_codes = [lab.repeat(K) for lab in frame.index.codes] clev, clab = factorize(frame.columns) new_levels.append(clev) - new_labels.append(np.tile(clab, N).ravel()) + new_codes.append(np.tile(clab, N).ravel()) new_names = list(frame.index.names) new_names.append(frame.columns.name) - new_index = MultiIndex(levels=new_levels, labels=new_labels, + new_index = MultiIndex(levels=new_levels, labels=new_codes, names=new_names, verify_integrity=False) else: levels, (ilab, clab) = zip(*map(factorize, (frame.index, frame.columns))) - labels = ilab.repeat(K), np.tile(clab, N).ravel() - new_index = MultiIndex(levels=levels, labels=labels, + codes = ilab.repeat(K), np.tile(clab, N).ravel() + new_index = MultiIndex(levels=levels, labels=codes, names=[frame.index.name, frame.columns.name], verify_integrity=False) @@ -592,9 +592,9 @@ def _convert_level_number(level_num, columns): # tuple list excluding level for grouping columns if len(frame.columns.levels) > 2: - tuples = list(zip(*[lev.take(lab) - for lev, lab in zip(this.columns.levels[:-1], - this.columns.labels[:-1])])) + tuples = list(zip(*[lev.take(level_codes) for lev, level_codes + in zip(this.columns.levels[:-1], + this.columns.codes[:-1])])) unique_groups = [key for key, _ in itertools.groupby(tuples)] new_names = this.columns.names[:-1] new_columns = MultiIndex.from_tuples(unique_groups, names=new_names) @@ -604,9 +604,9 @@ def _convert_level_number(level_num, columns): # time to ravel the values new_data = {} level_vals = this.columns.levels[-1] - level_labels = sorted(set(this.columns.labels[-1])) - level_vals_used = level_vals[level_labels] - levsize = len(level_labels) + level_codes = sorted(set(this.columns.codes[-1])) + level_vals_used = level_vals[level_codes] + levsize = len(level_codes) drop_cols = [] for key in unique_groups: try: @@ -625,8 +625,8 @@ def _convert_level_number(level_num, columns): slice_len = loc.stop - loc.start if slice_len != levsize: - chunk = this[this.columns[loc]] - chunk.columns = level_vals.take(chunk.columns.labels[-1]) + chunk = this.loc[:, this.columns[loc]] + chunk.columns = level_vals.take(chunk.columns.codes[-1]) value_slice = chunk.reindex(columns=level_vals_used).values else: if (frame._is_homogeneous_type and @@ -660,17 +660,17 @@ def _convert_level_number(level_num, columns): if isinstance(this.index, MultiIndex): new_levels = list(this.index.levels) new_names = list(this.index.names) - new_labels = [lab.repeat(levsize) for lab in this.index.labels] + new_codes = [lab.repeat(levsize) for lab in this.index.codes] else: new_levels = [this.index] - new_labels = [np.arange(N).repeat(levsize)] + new_codes = [np.arange(N).repeat(levsize)] new_names = [this.index.name] # something better? new_levels.append(level_vals) - new_labels.append(np.tile(level_labels, N)) + new_codes.append(np.tile(level_codes, N)) new_names.append(frame.columns.names[level_num]) - new_index = MultiIndex(levels=new_levels, labels=new_labels, + new_index = MultiIndex(levels=new_levels, labels=new_codes, names=new_names, verify_integrity=False) result = frame._constructor(new_data, index=new_index, columns=new_columns) @@ -979,13 +979,13 @@ def make_axis_dummies(frame, axis='minor', transform=None): num = numbers.get(axis, axis) items = frame.index.levels[num] - labels = frame.index.labels[num] + codes = frame.index.codes[num] if transform is not None: mapped_items = items.map(transform) - labels, items = _factorize_from_iterable(mapped_items.take(labels)) + codes, items = _factorize_from_iterable(mapped_items.take(codes)) values = np.eye(len(items), dtype=float) - values = values.take(labels, axis=0) + values = values.take(codes, axis=0) return DataFrame(values, columns=items, index=frame.index) diff --git a/pandas/core/series.py b/pandas/core/series.py index 20e4720a3bde7..ab73c5983d5e2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1410,14 +1410,14 @@ def count(self, level=None): level = self.index._get_level_number(level) lev = self.index.levels[level] - lab = np.array(self.index.labels[level], subok=False, copy=True) + level_codes = np.array(self.index.codes[level], subok=False, copy=True) - mask = lab == -1 + mask = level_codes == -1 if mask.any(): - lab[mask] = cnt = len(lev) + level_codes[mask] = cnt = len(lev) lev = lev.insert(cnt, lev._na_value) - obs = lab[notna(self.values)] + obs = level_codes[notna(self.values)] out = np.bincount(obs, minlength=len(lev) or None) return self._constructor(out, index=lev, dtype='int64').__finalize__(self) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 6a2cfd4d4a7b3..dd17b3d3af38d 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -148,7 +148,7 @@ def hash_tuples(vals, encoding='utf8', hash_key=None): vals = MultiIndex.from_tuples(vals) # create a list-of-Categoricals - vals = [Categorical(vals.labels[level], + vals = [Categorical(vals.codes[level], vals.levels[level], ordered=False, fastpath=True) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index c2ea3715b9f3b..d74722996a660 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -431,9 +431,9 @@ def _format_header_mi(self): name = columns.names[lnum] yield ExcelCell(lnum, coloffset, name, self.header_style) - for lnum, (spans, levels, labels) in enumerate(zip( - level_lengths, columns.levels, columns.labels)): - values = levels.take(labels) + for lnum, (spans, levels, level_codes) in enumerate(zip( + level_lengths, columns.levels, columns.codes)): + values = levels.take(level_codes) for i in spans: if spans[i] > 1: yield ExcelCell(lnum, coloffset + i + 1, values[i], @@ -574,11 +574,11 @@ def _format_hierarchical_rows(self): names=False) level_lengths = get_level_lengths(level_strs) - for spans, levels, labels in zip(level_lengths, - self.df.index.levels, - self.df.index.labels): + for spans, levels, level_codes in zip(level_lengths, + self.df.index.levels, + self.df.index.codes): - values = levels.take(labels, + values = levels.take(level_codes, allow_fill=levels._can_hold_na, fill_value=True) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 41e14e482d061..d986b050bb6d7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2607,9 +2607,9 @@ def read_sparse_intindex(self, key, **kwargs): def write_multi_index(self, key, index): setattr(self.attrs, '%s_nlevels' % key, index.nlevels) - for i, (lev, lab, name) in enumerate(zip(index.levels, - index.labels, - index.names)): + for i, (lev, level_codes, name) in enumerate(zip(index.levels, + index.codes, + index.names)): # write the level level_key = '%s_level%d' % (key, i) conv_level = _convert_index(lev, self.encoding, self.errors, @@ -2624,7 +2624,7 @@ def write_multi_index(self, key, index): # write the labels label_key = '%s_label%d' % (key, i) - self.write_array(label_key, lab) + self.write_array(label_key, level_codes) def read_multi_index(self, key, **kwargs): nlevels = getattr(self.attrs, '%s_nlevels' % key) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 2b4d1e6f25c65..f9b60ba347bfe 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -748,9 +748,9 @@ def test_reset_index(self, float_frame): names = ['first', 'second'] stacked.index.names = names deleveled = stacked.reset_index() - for i, (lev, lab) in enumerate(zip(stacked.index.levels, - stacked.index.labels)): - values = lev.take(lab) + for i, (lev, level_codes) in enumerate(zip(stacked.index.levels, + stacked.index.codes)): + values = lev.take(level_codes) name = names[i] tm.assert_index_equal(values, Index(deleveled[name])) @@ -1084,7 +1084,7 @@ def test_rename_axis_style_raises(self): df.rename(id, mapper=id) def test_reindex_api_equivalence(self): - # equivalence of the labels/axis and index/columns API's + # equivalence of the labels/axis and index/columns API's df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], index=['a', 'b', 'c'], columns=['d', 'e', 'f']) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e92e5a70b263f..2b1f1cac2016f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -76,7 +76,7 @@ def test_basic(dtype): def test_groupby_nonobject_dtype(mframe, df_mixed_floats): - key = mframe.index.labels[0] + key = mframe.index.codes[0] grouped = mframe.groupby(key) result = grouped.sum() diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index 70d79ddfdc22e..cc7b48069b354 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -11,7 +11,7 @@ def test_astype(idx): expected = idx.copy() actual = idx.astype('O') assert_copy(actual.levels, expected.levels) - assert_copy(actual.labels, expected.labels) + assert_copy(actual.codes, expected.codes) assert [level.name for level in actual.levels] == list(expected.names) with pytest.raises(TypeError, match="^Setting.*dtype.*object"): diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index bb9b2877d519c..840267198c4f5 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -87,13 +87,13 @@ def test_constructor_mismatched_label_levels(idx): def test_copy_in_constructor(): levels = np.array(["a", "b", "c"]) - labels = np.array([1, 1, 2, 0, 0, 1, 1]) - val = labels[0] - mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], + codes = np.array([1, 1, 2, 0, 0, 1, 1]) + val = codes[0] + mi = MultiIndex(levels=[levels, levels], labels=[codes, codes], copy=True) - assert mi.labels[0][0] == val - labels[0] = 15 - assert mi.labels[0][0] == val + assert mi.codes[0][0] == val + codes[0] = 15 + assert mi.codes[0][0] == val val = levels[0] levels[0] = "PANDA" assert mi.levels[0][0] == val @@ -101,8 +101,8 @@ def test_copy_in_constructor(): def test_from_arrays(idx): arrays = [] - for lev, lab in zip(idx.levels, idx.labels): - arrays.append(np.asarray(lev).take(lab)) + for lev, level_codes in zip(idx.levels, idx.codes): + arrays.append(np.asarray(lev).take(level_codes)) # list of arrays as input result = MultiIndex.from_arrays(arrays, names=idx.names) @@ -118,8 +118,8 @@ def test_from_arrays(idx): def test_from_arrays_iterator(idx): # GH 18434 arrays = [] - for lev, lab in zip(idx.levels, idx.labels): - arrays.append(np.asarray(lev).take(lab)) + for lev, level_codes in zip(idx.levels, idx.codes): + arrays.append(np.asarray(lev).take(level_codes)) # iterator as input result = MultiIndex.from_arrays(iter(arrays), names=idx.names) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 0d09e3ef2e4b1..243df96070d4e 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -11,11 +11,11 @@ def assert_multiindex_copied(copy, original): # Levels should be (at least, shallow copied) tm.assert_copy(copy.levels, original.levels) - tm.assert_almost_equal(copy.labels, original.labels) + tm.assert_almost_equal(copy.codes, original.codes) # Labels doesn't matter which way copied - tm.assert_almost_equal(copy.labels, original.labels) - assert copy.labels is not original.labels + tm.assert_almost_equal(copy.codes, original.codes) + assert copy.codes is not original.codes # Names doesn't matter which way copied assert copy.names == original.names @@ -79,7 +79,7 @@ def test_copy_method_kwargs(deep, kwarg, value): labels=[[0, 0, 0, 1], [0, 0, 1, 1]], names=['first', 'second'] ) - + return idx_copy = idx.copy(**{kwarg: value, 'deep': deep}) if kwarg == 'names': assert getattr(idx_copy, kwarg) == value diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index bd1f313897ea2..3eb8024cb01dd 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -102,7 +102,7 @@ def test_equals_multi(idx): lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) + index2 = MultiIndex(levels=index.levels[:-1], labels=index.codes[:-1]) assert not index.equals(index2) assert not index.equal_levels(index2) @@ -110,11 +110,11 @@ def test_equals_multi(idx): major_axis = Index(lrange(4)) minor_axis = Index(lrange(2)) - major_labels = np.array([0, 0, 1, 2, 2, 3]) - minor_labels = np.array([0, 1, 0, 0, 1, 0]) + major_codes = np.array([0, 0, 1, 2, 2, 3]) + minor_codes = np.array([0, 1, 0, 0, 1, 0]) index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) + labels=[major_codes, minor_codes]) assert not idx.equals(index) assert not idx.equal_levels(index) @@ -122,11 +122,11 @@ def test_equals_multi(idx): major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) - major_labels = np.array([0, 0, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) + major_codes = np.array([0, 0, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) + labels=[major_codes, minor_codes]) assert not idx.equals(index) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index d534ac717cbee..33f0540905ca4 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -151,23 +151,23 @@ def test_set_name_methods(idx, index_names): assert ind.names == new_names2 -def test_set_levels_labels_directly(idx): - # setting levels/labels directly raises AttributeError +def test_set_levels_codes_directly(idx): + # setting levels/codes directly raises AttributeError levels = idx.levels new_levels = [[lev + 'a' for lev in level] for level in levels] - labels = idx.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] with pytest.raises(AttributeError): idx.levels = new_levels with pytest.raises(AttributeError): - idx.labels = new_labels + idx.codes = new_codes def test_set_levels(idx): @@ -231,16 +231,10 @@ def test_set_levels(idx): assert_matching(idx.levels, original_index.levels, check_dtype=True) -<<<<<<< HEAD - with pytest.raises(ValueError, match="^On"): - idx.set_labels([0, 1, 2, 3, 4, 5], level=0, - inplace=inplace) -======= - with tm.assert_raises_regex(ValueError, "^On"): + with pytest.raises(ValueError, "^On"): idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) ->>>>>>> MultiIndex.set_labels -> set_codes - assert_matching(idx.labels, original_index.labels, + assert_matching(idx.codes, original_index.codes, check_dtype=True) with pytest.raises(TypeError, match="^Levels"): @@ -248,21 +242,16 @@ def test_set_levels(idx): assert_matching(idx.levels, original_index.levels, check_dtype=True) -<<<<<<< HEAD - with pytest.raises(TypeError, match="^Labels"): - idx.set_labels(1, level=0, inplace=inplace) -======= - with tm.assert_raises_regex(TypeError, "^Codes"): + with pytest.raises(TypeError, "^Codes"): idx.set_codes(1, level=0, inplace=inplace) ->>>>>>> MultiIndex.set_labels -> set_codes - assert_matching(idx.labels, original_index.labels, + assert_matching(idx.codes, original_index.codes, check_dtype=True) def test_set_codes(idx): # side note - you probably wouldn't want to use levels and codes # directly like this - but it is possible. - codes = idx.labels + codes = idx.codes major_codes, minor_codes = codes major_codes = [(x + 1) % 3 for x in major_codes] minor_codes = [(x + 1) % 1 for x in minor_codes] @@ -270,49 +259,49 @@ def test_set_codes(idx): # changing codes w/o mutation ind2 = idx.set_codes(new_codes) - assert_matching(ind2.labels, new_codes) - assert_matching(idx.labels, codes) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) # changing label w/ mutation ind2 = idx.copy() inplace_return = ind2.set_codes(new_codes, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, new_codes) + assert_matching(ind2.codes, new_codes) # codes changing specific level w/o mutation ind2 = idx.set_codes(new_codes[0], level=0) - assert_matching(ind2.labels, [new_codes[0], codes[1]]) - assert_matching(idx.labels, codes) + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) ind2 = idx.set_codes(new_codes[1], level=1) - assert_matching(ind2.labels, [codes[0], new_codes[1]]) - assert_matching(idx.labels, codes) + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) # codes changing multiple levels w/o mutation ind2 = idx.set_codes(new_codes, level=[0, 1]) - assert_matching(ind2.labels, new_codes) - assert_matching(idx.labels, codes) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) # label changing specific level w/ mutation ind2 = idx.copy() inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, [new_codes[0], codes[1]]) - assert_matching(idx.labels, codes) + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) ind2 = idx.copy() inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True) assert inplace_return is None - assert_matching(ind2.labels, [codes[0], new_codes[1]]) - assert_matching(idx.labels, codes) + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) # codes changing multiple levels [w/ mutation] ind2 = idx.copy() inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True) assert inplace_return is None - assert_matching(ind2.labels, new_codes) - assert_matching(idx.labels, codes) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) # label changing for levels of different magnitude of categories ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) @@ -352,7 +341,7 @@ def test_set_labels_deprecated(): def test_set_levels_codes_names_bad_input(idx): - levels, codes = idx.levels, idx.labels + levels, codes = idx.levels, idx.codes names = idx.names with pytest.raises(ValueError, match='Length of levels'): @@ -369,13 +358,8 @@ def test_set_levels_codes_names_bad_input(idx): idx.set_levels(levels[0]) # shouldn't scalar data error, instead should demand list-like -<<<<<<< HEAD - with pytest.raises(TypeError, match='list of lists-like'): - idx.set_labels(labels[0]) -======= with tm.assert_raises_regex(TypeError, 'list of lists-like'): idx.set_codes(codes[0]) ->>>>>>> MultiIndex.set_labels -> set_codes # shouldn't scalar data error, instead should demand list-like with pytest.raises(TypeError, match='list-like'): @@ -389,19 +373,11 @@ def test_set_levels_codes_names_bad_input(idx): idx.set_levels(levels, level=0) # should have equal lengths -<<<<<<< HEAD - with pytest.raises(TypeError, match='list of lists-like'): - idx.set_labels(labels[0], level=[0, 1]) - - with pytest.raises(TypeError, match='list-like'): - idx.set_labels(labels, level=0) -======= - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, 'list of lists-like'): idx.set_codes(codes[0], level=[0, 1]) - with tm.assert_raises_regex(TypeError, 'list-like'): + with pytest.raises(TypeError, 'list-like'): idx.set_codes(codes, level=0) ->>>>>>> MultiIndex.set_labels -> set_codes # should have equal lengths with pytest.raises(ValueError, match='Length of names'): @@ -436,7 +412,7 @@ def test_set_levels_categorical(ordered): cidx = CategoricalIndex(list("bac"), ordered=ordered) result = index.set_levels(cidx, 0) expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], - labels=index.labels) + labels=index.codes) tm.assert_index_equal(result, expected) result_lvl = result.get_level_values(0) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 2ec08fa89d133..5e21f8ede0249 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -16,19 +16,19 @@ def test_labels_dtypes(): # GH 8456 i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - assert i.labels[0].dtype == 'int8' - assert i.labels[1].dtype == 'int8' + assert i.codes[0].dtype == 'int8' + assert i.codes[1].dtype == 'int8' i = MultiIndex.from_product([['a'], range(40)]) - assert i.labels[1].dtype == 'int8' + assert i.codes[1].dtype == 'int8' i = MultiIndex.from_product([['a'], range(400)]) - assert i.labels[1].dtype == 'int16' + assert i.codes[1].dtype == 'int16' i = MultiIndex.from_product([['a'], range(40000)]) - assert i.labels[1].dtype == 'int32' + assert i.codes[1].dtype == 'int32' i = pd.MultiIndex.from_product([['a'], range(1000)]) - assert (i.labels[0] >= 0).all() - assert (i.labels[1] >= 0).all() + assert (i.codes[0] >= 0).all() + assert (i.codes[1] >= 0).all() def test_values_boxed(): @@ -194,7 +194,7 @@ def test_can_hold_identifiers(idx): def test_metadata_immutable(idx): - levels, labels = idx.levels, idx.labels + levels, codes = idx.levels, idx.codes # shouldn't be able to set at either the top level or base level mutable_regex = re.compile('does not support mutable operations') with pytest.raises(TypeError, match=mutable_regex): @@ -202,10 +202,10 @@ def test_metadata_immutable(idx): with pytest.raises(TypeError, match=mutable_regex): levels[0][0] = levels[0][0] # ditto for labels - with pytest.raises(TypeError, match=mutable_regex): - labels[0] = labels[0] - with pytest.raises(TypeError, match=mutable_regex): - labels[0][0] = labels[0][0] + with pytest.raises(TypeError, mutable_regex): + codes[0] = codes[0] + with pytest.raises(TypeError, mutable_regex): + codes[0][0] = codes[0][0] # and for names names = idx.names with pytest.raises(TypeError, match=mutable_regex): diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index 1f63f1ef100c1..ac7e379c6cd42 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -99,14 +99,14 @@ def test_names(idx, index_names): # initializing with bad names (should always be equivalent) major_axis, minor_axis = idx.levels - major_labels, minor_labels = idx.labels - with pytest.raises(ValueError, match="^Length of names"): + major_codes, minor_codes = idx.codes + with pytest.raises(ValueError, "^Length of names"): MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], + labels=[major_codes, minor_codes], names=['first']) - with pytest.raises(ValueError, match="^Length of names"): + with pytest.raises(ValueError, "^Length of names"): MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], + labels=[major_codes, minor_codes], names=['first', 'second', 'third']) # names are assigned diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index ec250f477f804..d8bce5b8748f9 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -664,7 +664,7 @@ def test_getitem_partial(self): ymd = self.ymd.T result = ymd[2000, 2] - expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) + expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) tm.assert_frame_equal(result, expected) @@ -701,8 +701,8 @@ def test_fancy_slice_partial(self): tm.assert_frame_equal(result, expected) result = self.ymd.loc[(2000, 2):(2000, 4)] - lev = self.ymd.index.labels[1] - expected = self.ymd[(lev >= 1) & (lev <= 3)] + level_codes = self.ymd.index.codes[1] + expected = self.ymd[(level_codes >= 1) & (level_codes <= 3)] tm.assert_frame_equal(result, expected) def test_getitem_partial_column_select(self): @@ -1198,9 +1198,9 @@ def test_unstack_sparse_keyspace(self): def test_unstack_unobserved_keys(self): # related to #2278 refactoring levels = [[0, 1], [0, 1, 2, 3]] - labels = [[0, 0, 1, 1], [0, 2, 0, 2]] + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] - index = MultiIndex(levels, labels) + index = MultiIndex(levels, codes) df = DataFrame(np.random.randn(4, 2), index=index) @@ -1575,11 +1575,11 @@ def test_unstack_preserve_types(self): assert unstacked['F', 1].dtype == np.float64 def test_unstack_group_index_overflow(self): - labels = np.tile(np.arange(500), 2) + codes = np.tile(np.arange(500), 2) level = np.arange(500) index = MultiIndex(levels=[level] * 8 + [[0, 1]], - labels=[labels] * 8 + [np.arange(2).repeat(500)]) + labels=[codes] * 8 + [np.arange(2).repeat(500)]) s = Series(np.arange(1000), index=index) result = s.unstack() @@ -1591,7 +1591,7 @@ def test_unstack_group_index_overflow(self): # put it at beginning index = MultiIndex(levels=[[0, 1]] + [level] * 8, - labels=[np.arange(2).repeat(500)] + [labels] * 8) + labels=[np.arange(2).repeat(500)] + [codes] * 8) s = Series(np.arange(1000), index=index) result = s.unstack(0) @@ -1599,8 +1599,8 @@ def test_unstack_group_index_overflow(self): # put it in middle index = MultiIndex(levels=[level] * 4 + [[0, 1]] + [level] * 4, - labels=([labels] * 4 + [np.arange(2).repeat(500)] + - [labels] * 4)) + labels=([codes] * 4 + [np.arange(2).repeat(500)] + + [codes] * 4)) s = Series(np.arange(1000), index=index) result = s.unstack(4) @@ -1955,8 +1955,8 @@ def test_drop_preserve_names(self): def test_unicode_repr_issues(self): levels = [Index([u('a/\u03c3'), u('b/\u03c3'), u('c/\u03c3')]), Index([0, 1])] - labels = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] - index = MultiIndex(levels=levels, labels=labels) + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, labels=codes) repr(index.levels) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index bc644071e914f..3cd169ee56abd 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2470,10 +2470,10 @@ def is_sorted(arr): return (arr[1:] > arr[:-1]).any() sorted_minor = self.panel.sort_index(level=1) - assert is_sorted(sorted_minor.index.labels[1]) + assert is_sorted(sorted_minor.index.codes[1]) sorted_major = sorted_minor.sort_index(level=0) - assert is_sorted(sorted_major.index.labels[0]) + assert is_sorted(sorted_major.index.codes[0]) def test_to_string(self): buf = StringIO() @@ -2545,7 +2545,7 @@ def test_axis_dummies(self): def test_get_dummies(self): from pandas.core.reshape.reshape import get_dummies, make_axis_dummies - self.panel['Label'] = self.panel.index.labels[1] + self.panel['Label'] = self.panel.index.codes[1] minor_dummies = make_axis_dummies(self.panel, 'minor').astype(np.uint8) dummies = get_dummies(self.panel['Label']) tm.assert_numpy_array_equal(dummies.values, minor_dummies.values) @@ -2568,14 +2568,14 @@ def test_count(self): index = self.panel.index major_count = self.panel.count(level=0)['ItemA'] - labels = index.labels[0] + level_codes = index.codes[0] for i, idx in enumerate(index.levels[0]): - assert major_count[i] == (labels == i).sum() + assert major_count[i] == (level_codes == i).sum() minor_count = self.panel.count(level=1)['ItemA'] - labels = index.labels[1] + level_codes = index.codes[1] for i, idx in enumerate(index.levels[1]): - assert minor_count[i] == (labels == i).sum() + assert minor_count[i] == (level_codes == i).sum() def test_join(self): lp1 = self.panel.filter(['ItemA', 'ItemB']) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1fa77f5321038..3a39b6a32a29d 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -837,7 +837,7 @@ def _check_types(l, r, obj='Index'): def _get_ilevel_values(index, level): # accept level number only unique = index.levels[level] - labels = index.labels[level] + labels = index.codes[level] filled = take_1d(unique.values, labels, fill_value=unique._na_value) values = unique._shallow_copy(filled, name=index.names[level]) return values diff --git a/test_fast.bat b/test_fast.bat index 81f30dd310e28..d5acec683d33c 100644 --- a/test_fast.bat +++ b/test_fast.bat @@ -1,3 +1,3 @@ :: test on windows set PYTHONHASHSEED=314159265 -pytest --skip-slow --skip-network -m "not single" -n 4 -r sXX --strict pandas +pytest --skip-slow --skip-network -m "not single" -n 4 -r sXX --strict pandas -x