Skip to content

Commit

Permalink
Drop names, dtype in Index.copy and dtype, levels, codes
Browse files Browse the repository at this point in the history
…in `MultiIndex.copy` (#12898)

This PR removes `dtype` in Index & `MultiIndex.copy`, and `names` in Index.copy
  • Loading branch information
galipremsagar authored Mar 8, 2023
1 parent 5af0583 commit 531f52c
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 190 deletions.
75 changes: 7 additions & 68 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def __contains__(self, item):
return item in range(self._start, self._stop, self._step)

@_cudf_nvtx_annotate
def copy(self, name=None, deep=False, dtype=None, names=None):
def copy(self, name=None, deep=False):
"""
Make a copy of this object.
Expand All @@ -311,44 +311,11 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
name : object optional (default: None), name of index
deep : Bool (default: False)
Ignored for RangeIndex
dtype : numpy dtype optional (default: None)
Target dtype for underlying range data
.. deprecated:: 23.02
The `dtype` parameter is deprecated and will be removed in
a future version of cudf. Use the `astype` method instead.
names : list-like optional (default: False)
Kept compatibility with MultiIndex. Should not be used.
.. deprecated:: 23.04
The parameter `names` is deprecated and will be removed in
a future version of cudf. Use the `name` parameter instead.
Returns
-------
New RangeIndex instance with same range, casted to new dtype
New RangeIndex instance with same range
"""
if dtype is not None:
warnings.warn(
"parameter dtype is deprecated and will be removed in a "
"future version. Use the astype method instead.",
FutureWarning,
)

if names is not None:
warnings.warn(
"parameter names is deprecated and will be removed in a "
"future version. Use the name parameter instead.",
FutureWarning,
)

dtype = self.dtype if dtype is None else dtype

if not np.issubdtype(dtype, np.signedinteger):
raise ValueError(f"Expected Signed Integer Type, Got {dtype}")

name = self.name if name is None else name

Expand Down Expand Up @@ -1140,7 +1107,7 @@ def equals(self, other, **kwargs):
return False

@_cudf_nvtx_annotate
def copy(self, name=None, deep=False, dtype=None, names=None):
def copy(self, name=None, deep=False):
"""
Make a copy of this object.
Expand All @@ -1151,45 +1118,17 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
deep : bool, default True
Make a deep copy of the data.
With ``deep=False`` the original data is used
dtype : numpy dtype, default None
Target datatype to cast into, use original dtype when None
.. deprecated:: 23.02
The `dtype` parameter is deprecated and will be removed in
a future version of cudf. Use the `astype` method instead.
names : list-like, default False
Kept compatibility with MultiIndex. Should not be used.
.. deprecated:: 23.04
The parameter `names` is deprecated and will be removed in
a future version of cudf. Use the `name` parameter instead.
Returns
-------
New index instance, casted to new dtype
New index instance.
"""
if dtype is not None:
warnings.warn(
"parameter dtype is deprecated and will be removed in a "
"future version. Use the astype method instead.",
FutureWarning,
)

if names is not None:
warnings.warn(
"parameter names is deprecated and will be removed in a "
"future version. Use the name parameter instead.",
FutureWarning,
)

dtype = self.dtype if dtype is None else dtype
name = self.name if name is None else name

col = self._values.astype(dtype)
return _index_from_data({name: col.copy(True) if deep else col})
return _index_from_data(
{name: self._values.copy(True) if deep else self._values}
)

@_cudf_nvtx_annotate
@doc_apply(_index_astype_docstring)
Expand Down
72 changes: 2 additions & 70 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import itertools
import numbers
import pickle
import warnings
from collections import abc
from functools import cached_property
from numbers import Integral
Expand Down Expand Up @@ -318,9 +317,6 @@ def name(self, value):
def copy(
self,
names=None,
dtype=None,
levels=None,
codes=None,
deep=False,
name=None,
):
Expand All @@ -334,36 +330,12 @@ def copy(
----------
names : sequence of objects, optional (default None)
Names for each of the index levels.
dtype : object, optional (default None)
MultiIndex dtype, only supports None or object type
.. deprecated:: 23.02
The `dtype` parameter is deprecated and will be removed in
a future version of cudf. Use the `astype` method instead.
levels : sequence of arrays, optional (default None)
The unique labels for each level. Original values used if None.
.. deprecated:: 23.02
The `levels` parameter is deprecated and will be removed in
a future version of cudf.
codes : sequence of arrays, optional (default None)
Integers for each level designating which label at each location.
Original values used if None.
.. deprecated:: 23.02
The `codes` parameter is deprecated and will be removed in
a future version of cudf.
deep : Bool (default False)
If True, `._data`, `._levels`, `._codes` will be copied. Ignored if
`levels` or `codes` are specified.
name : object, optional (default None)
To keep consistent with `Index.copy`, should not be used.
Kept for compatibility with 1-dimensional Index. Should not
be used.
Returns
-------
Expand Down Expand Up @@ -401,46 +373,6 @@ def copy(
"""

# TODO: Update message when set_levels is implemented.
# https://github.com/rapidsai/cudf/issues/12307
if levels is not None:
warnings.warn(
"parameter levels is deprecated and will be removed in a "
"future version.",
FutureWarning,
)

# TODO: Update message when set_codes is implemented.
# https://github.com/rapidsai/cudf/issues/12308
if codes is not None:
warnings.warn(
"parameter codes is deprecated and will be removed in a "
"future version.",
FutureWarning,
)

if dtype is not None:
warnings.warn(
"parameter dtype is deprecated and will be removed in a "
"future version. Use the astype method instead.",
FutureWarning,
)

dtype = object if dtype is None else dtype
if not pd.core.dtypes.common.is_object_dtype(dtype):
raise TypeError("Dtype for MultiIndex only supports object type.")

# ._data needs to be rebuilt
if levels is not None or codes is not None:
if self._levels is None or self._codes is None:
self._compute_levels_and_codes()
levels = self._levels if levels is None else levels
codes = self._codes if codes is None else codes
names = self.names if names is None else names

mi = MultiIndex(levels=levels, codes=codes, names=names, copy=deep)
return mi

mi = MultiIndex._from_data(self._data.copy(deep=deep))
if self._levels is not None:
mi._levels = [s.copy(deep) for s in self._levels]
Expand Down
58 changes: 18 additions & 40 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
NUMERIC_TYPES,
OTHER_TYPES,
SIGNED_INTEGER_TYPES,
SIGNED_TYPES,
UNSIGNED_TYPES,
_create_pandas_series,
assert_column_memory_eq,
Expand Down Expand Up @@ -307,90 +306,69 @@ def test_set_index_as_property():


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", SIGNED_INTEGER_TYPES)
def test_index_copy_range(name, dtype, deep=True):
def test_index_copy_range(name, deep=True):
cidx = cudf.RangeIndex(1, 5)
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype,", ["datetime64[ns]", "int64"])
def test_index_copy_datetime(name, dtype, deep=True):
def test_index_copy_datetime(name, deep=True):
cidx = cudf.DatetimeIndex(["2001", "2002", "2003"])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", ["category", "object"])
def test_index_copy_string(name, dtype, deep=True):
def test_index_copy_string(name, deep=True):
cidx = cudf.StringIndex(["a", "b", "c"])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize(
"dtype",
NUMERIC_TYPES + ["datetime64[ns]", "timedelta64[ns]"] + OTHER_TYPES,
)
def test_index_copy_integer(name, dtype, deep=True):
def test_index_copy_integer(name, deep=True):
"""Test for NumericIndex Copy Casts"""
cidx = cudf.Index([1, 2, 3])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", SIGNED_TYPES)
def test_index_copy_float(name, dtype, deep=True):
def test_index_copy_float(name, deep=True):
"""Test for NumericIndex Copy Casts"""
cidx = cudf.Index([1.0, 2.0, 3.0])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["category"])
def test_index_copy_category(name, dtype, deep=True):
def test_index_copy_category(name, deep=True):
cidx = cudf.core.index.CategoricalIndex([1, 2, 3])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_column_memory_ne(cidx._values, cidx_copy._values)
assert_eq(pidx_copy, cidx_copy)
Expand Down
15 changes: 3 additions & 12 deletions python/cudf/cudf/tests/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,15 +700,8 @@ def test_multiindex_equals():
}
],
)
@pytest.mark.parametrize(
"levels",
[[["2000-01-01", "2000-01-02", "2000-01-03"], ["A", "B", "C"]], None],
)
@pytest.mark.parametrize(
"codes", [[[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]], None]
)
@pytest.mark.parametrize("names", [["X", "Y"]])
def test_multiindex_copy_sem(data, levels, codes, names):
def test_multiindex_copy_sem(data, names):
"""Test semantic equality for MultiIndex.copy"""
gdf = cudf.DataFrame(data)
pdf = gdf.to_pandas()
Expand All @@ -717,12 +710,10 @@ def test_multiindex_copy_sem(data, levels, codes, names):
pdf = pdf.groupby(["Date", "Symbol"], sort=True).mean()

gmi = gdf.index
with expect_warning_if(levels is not None or codes is not None):
gmi_copy = gmi.copy(levels=levels, codes=codes, names=names)
gmi_copy = gmi.copy(names=names)

pmi = pdf.index
with expect_warning_if(levels is not None or codes is not None):
pmi_copy = pmi.copy(levels=levels, codes=codes, names=names)
pmi_copy = pmi.copy(names=names)

for glv, plv in zip(gmi_copy.levels, pmi_copy.levels):
assert all(glv.values_host == plv.values)
Expand Down

0 comments on commit 531f52c

Please sign in to comment.