From 7d8dce23a12447b2155d9e6617029aebb9535d68 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 25 Nov 2019 11:01:47 -0800 Subject: [PATCH 1/3] DEPR: remove FrozenNDarray --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/compat/pickle_compat.py | 17 +---- pandas/core/indexes/frozen.py | 80 -------------------- pandas/core/indexes/multi.py | 21 +++-- pandas/tests/indexes/multi/test_integrity.py | 2 +- pandas/tests/indexes/test_frozen.py | 64 +--------------- 6 files changed, 19 insertions(+), 166 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7d11d90eeb670..f57a003181cab 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -426,6 +426,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed previously deprecated :func:`pandas.tseries.plotting.tsplot` (:issue:`18627`) - Removed the previously deprecated ``reduce`` and ``broadcast`` arguments from :meth:`DataFrame.apply` (:issue:`18577`) - Removed the previously deprecated ``assert_raises_regex`` function in ``pandas.util.testing`` (:issue:`29174`) + - Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) - Removed previously deprecated "nthreads" argument from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) - Removed :meth:`Index.is_lexsorted_for_tuple` (:issue:`29305`) - Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 458c0c07c7602..aeec5e8a0400a 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -89,21 +89,8 @@ def __new__(cls) -> "DataFrame": # type: ignore _class_locations_map = { ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), # 15477 - # - # TODO: When FrozenNDArray is removed, add - # the following lines for compat: - # - # ('pandas.core.base', 'FrozenNDArray'): - # ('numpy', 'ndarray'), - # ('pandas.core.indexes.frozen', 'FrozenNDArray'): - # ('numpy', 'ndarray'), - # - # Afterwards, remove the current entry - # for `pandas.core.base.FrozenNDArray`. - ("pandas.core.base", "FrozenNDArray"): ( - "pandas.core.indexes.frozen", - "FrozenNDArray", - ), + ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"), ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"), # 10890 ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"), diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 2c9521d23f71a..cd8413573e5c1 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -4,14 +4,8 @@ These are used for: - .names (FrozenList) -- .levels & .codes (FrozenNDArray) """ -import warnings - -import numpy as np - -from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.core.base import PandasObject @@ -111,77 +105,3 @@ def __repr__(self) -> str: __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled - - -class FrozenNDArray(PandasObject, np.ndarray): - - # no __array_finalize__ for now because no metadata - def __new__(cls, data, dtype=None, copy=False): - warnings.warn( - "\nFrozenNDArray is deprecated and will be removed in a " - "future version.\nPlease use `numpy.ndarray` instead.\n", - FutureWarning, - stacklevel=2, - ) - - if copy is None: - copy = not isinstance(data, FrozenNDArray) - res = np.array(data, dtype=dtype, copy=copy).view(cls) - return res - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError( - "'{cls}' does not support mutable operations.".format(cls=self.__class__) - ) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - put = itemset = fill = _disabled - - def _shallow_copy(self): - return self.view() - - def values(self): - """returns *copy* of underlying array""" - arr = self.view(np.ndarray).copy() - return arr - - def __repr__(self) -> str: - """ - Return a string representation for this object. - """ - prepr = pprint_thing(self, escape_chars=("\t", "\r", "\n"), quote_strings=True) - return f"{type(self).__name__}({prepr}, dtype='{self.dtype}')" - - def searchsorted(self, value, side="left", sorter=None): - """ - Find indices to insert `value` so as to maintain order. - - For full documentation, see `numpy.searchsorted` - - See Also - -------- - numpy.searchsorted : Equivalent function. - """ - - # We are much more performant if the searched - # indexer is the same type as the array. - # - # This doesn't matter for int64, but DOES - # matter for smaller int dtypes. - # - # xref: https://github.com/numpy/numpy/issues/5370 - try: - value = self.dtype.type(value) - except ValueError: - pass - - return super().searchsorted(value, side=side, sorter=sorter) - - -def _ensure_frozen(array_like, categories, copy=False): - array_like = coerce_indexer_dtype(array_like, categories) - array_like = array_like.view(FrozenNDArray) - if copy: - array_like = array_like.copy() - return array_like diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 86398613798be..56db87a143e67 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -13,6 +13,7 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg +from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, @@ -40,7 +41,7 @@ _index_shared_docs, ensure_index, ) -from pandas.core.indexes.frozen import FrozenList, _ensure_frozen +from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.sorting import ( get_group_index, @@ -822,6 +823,13 @@ def labels(self): ) return self.codes + def _coerce(self, array_like, categories, copy=False): + array_like = coerce_indexer_dtype(array_like, categories) + if copy: + array_like = array_like.copy() + array_like.flags.writeable = False + return array_like + def _set_codes( self, codes, level=None, copy=False, validate=True, verify_integrity=False ): @@ -833,7 +841,7 @@ def _set_codes( if level is None: new_codes = FrozenList( - _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy() + self._coerce(level_codes, lev, copy=copy).view() for lev, level_codes in zip(self._levels, codes) ) else: @@ -841,9 +849,7 @@ def _set_codes( new_codes = list(self._codes) for lev_num, level_codes in zip(level_numbers, codes): lev = self.levels[lev_num] - new_codes[lev_num] = _ensure_frozen( - level_codes, lev, copy=copy - )._shallow_copy() + new_codes[lev_num] = self._coerce(level_codes, lev, copy=copy) new_codes = FrozenList(new_codes) if verify_integrity: @@ -1124,7 +1130,8 @@ def _format_native_types(self, na_rep="nan", **kwargs): if mask.any(): nan_index = len(level) level = np.append(level, na_rep) - level_codes = level_codes.values() + assert not level_codes.flags.writeable # i.e. copy is needed + level_codes = level_codes.copy() # make writeable level_codes[mask] = nan_index new_levels.append(level) new_codes.append(level_codes) @@ -2085,7 +2092,7 @@ def _assert_take_fillable( if mask.any(): masked = [] for new_label in taken: - label_values = new_label.values() + label_values = new_label label_values[mask] = na_value masked.append(np.asarray(label_values)) taken = masked diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 472a404c2a8ef..7cdb5cf31338a 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -210,7 +210,7 @@ def test_metadata_immutable(idx): # ditto for labels with pytest.raises(TypeError, match=mutable_regex): codes[0] = codes[0] - with pytest.raises(TypeError, match=mutable_regex): + with pytest.raises(ValueError, match="assignment destination is read-only"): codes[0][0] = codes[0][0] # and for names names = idx.names diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index c7b219b5ee890..9f6b0325b7b33 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -1,11 +1,7 @@ -import warnings - -import numpy as np import pytest -from pandas.core.indexes.frozen import FrozenList, FrozenNDArray +from pandas.core.indexes.frozen import FrozenList from pandas.tests.test_base import CheckImmutable, CheckStringMixin -import pandas.util.testing as tm class TestFrozenList(CheckImmutable, CheckStringMixin): @@ -55,61 +51,3 @@ def test_tricky_container_to_bytes_raises(self): msg = "^'str' object cannot be interpreted as an integer$" with pytest.raises(TypeError, match=msg): bytes(self.unicode_container) - - -class TestFrozenNDArray(CheckImmutable, CheckStringMixin): - mutable_methods = ("put", "itemset", "fill") - - def setup_method(self, _): - self.lst = [3, 5, 7, -2] - self.klass = FrozenNDArray - - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", FutureWarning) - - self.container = FrozenNDArray(self.lst) - self.unicode_container = FrozenNDArray(["\u05d0", "\u05d1", "c"]) - - def test_constructor_warns(self): - # see gh-9031 - with tm.assert_produces_warning(FutureWarning): - FrozenNDArray([1, 2, 3]) - - def test_tricky_container_to_bytes(self): - bytes(self.unicode_container) - - def test_shallow_copying(self): - original = self.container.copy() - assert isinstance(self.container.view(), FrozenNDArray) - assert not isinstance(self.container.view(np.ndarray), FrozenNDArray) - assert self.container.view() is not self.container - tm.assert_numpy_array_equal(self.container, original) - - # Shallow copy should be the same too - assert isinstance(self.container._shallow_copy(), FrozenNDArray) - - # setting should not be allowed - def testit(container): - container[0] = 16 - - self.check_mutable_error(testit, self.container) - - def test_values(self): - original = self.container.view(np.ndarray).copy() - n = original[0] + 15 - - vals = self.container.values() - tm.assert_numpy_array_equal(original, vals) - - assert original is not vals - vals[0] = n - - assert isinstance(self.container, FrozenNDArray) - tm.assert_numpy_array_equal(self.container.values(), original) - assert vals[0] == n - - def test_searchsorted(self): - expected = 2 - assert self.container.searchsorted(7) == expected - - assert self.container.searchsorted(value=7) == expected From 8d5d82b149d0a2a5f8a1c90dda507fdc4253b626 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 25 Nov 2019 16:36:53 -0800 Subject: [PATCH 2/3] whitespace fix --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 87474d6338749..1acb25807c776 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -433,7 +433,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed previously deprecated :func:`pandas.tseries.plotting.tsplot` (:issue:`18627`) - Removed the previously deprecated ``reduce`` and ``broadcast`` arguments from :meth:`DataFrame.apply` (:issue:`18577`) - Removed the previously deprecated ``assert_raises_regex`` function in ``pandas.util.testing`` (:issue:`29174`) - - Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) +- Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) - Removed previously deprecated "nthreads" argument from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) - Removed :meth:`Index.is_lexsorted_for_tuple` (:issue:`29305`) - Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`) From 7bfa03ab26da225fd4897e145df52e930d2b73f3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 25 Nov 2019 16:42:10 -0800 Subject: [PATCH 3/3] make _coerce module-level --- pandas/core/indexes/multi.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f44268d78f2ed..7d1d3fd7656cd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -811,13 +811,6 @@ def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): def codes(self): return self._codes - def _coerce(self, array_like, categories, copy=False): - array_like = coerce_indexer_dtype(array_like, categories) - if copy: - array_like = array_like.copy() - array_like.flags.writeable = False - return array_like - def _set_codes( self, codes, level=None, copy=False, validate=True, verify_integrity=False ): @@ -829,7 +822,7 @@ def _set_codes( if level is None: new_codes = FrozenList( - self._coerce(level_codes, lev, copy=copy).view() + _coerce_indexer_frozen(level_codes, lev, copy=copy).view() for lev, level_codes in zip(self._levels, codes) ) else: @@ -837,7 +830,7 @@ def _set_codes( new_codes = list(self._codes) for lev_num, level_codes in zip(level_numbers, codes): lev = self.levels[lev_num] - new_codes[lev_num] = self._coerce(level_codes, lev, copy=copy) + new_codes[lev_num] = _coerce_indexer_frozen(level_codes, lev, copy=copy) new_codes = FrozenList(new_codes) if verify_integrity: @@ -3440,3 +3433,26 @@ def maybe_droplevels(index, key): pass return index + + +def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: + """ + Coerce the array_like indexer to the smallest integer dtype that can encode all + of the given categories. + + Parameters + ---------- + array_like : array-like + categories : array-like + copy : bool + + Returns + ------- + np.ndarray + Non-writeable. + """ + array_like = coerce_indexer_dtype(array_like, categories) + if copy: + array_like = array_like.copy() + array_like.flags.writeable = False + return array_like