Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Drop names, dtype in Index.copy and dtype, levels, codes in MultiIndex.copy #12898

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 7 additions & 68 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def __contains__(self, item):
return item in range(self._start, self._stop, self._step)

@_cudf_nvtx_annotate
def copy(self, name=None, deep=False, dtype=None, names=None):
def copy(self, name=None, deep=False):
"""
Make a copy of this object.

Expand All @@ -311,44 +311,11 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
name : object optional (default: None), name of index
deep : Bool (default: False)
Ignored for RangeIndex
dtype : numpy dtype optional (default: None)
Target dtype for underlying range data

.. deprecated:: 23.02

The `dtype` parameter is deprecated and will be removed in
a future version of cudf. Use the `astype` method instead.

names : list-like optional (default: False)
Kept compatibility with MultiIndex. Should not be used.

.. deprecated:: 23.04

The parameter `names` is deprecated and will be removed in
a future version of cudf. Use the `name` parameter instead.

Returns
-------
New RangeIndex instance with same range, casted to new dtype
New RangeIndex instance with same range
"""
if dtype is not None:
warnings.warn(
"parameter dtype is deprecated and will be removed in a "
"future version. Use the astype method instead.",
FutureWarning,
)

if names is not None:
warnings.warn(
"parameter names is deprecated and will be removed in a "
"future version. Use the name parameter instead.",
FutureWarning,
)

dtype = self.dtype if dtype is None else dtype

if not np.issubdtype(dtype, np.signedinteger):
raise ValueError(f"Expected Signed Integer Type, Got {dtype}")

name = self.name if name is None else name

Expand Down Expand Up @@ -1140,7 +1107,7 @@ def equals(self, other, **kwargs):
return False

@_cudf_nvtx_annotate
def copy(self, name=None, deep=False, dtype=None, names=None):
def copy(self, name=None, deep=False):
"""
Make a copy of this object.

Expand All @@ -1151,45 +1118,17 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
deep : bool, default True
Make a deep copy of the data.
With ``deep=False`` the original data is used
dtype : numpy dtype, default None
Target datatype to cast into, use original dtype when None

.. deprecated:: 23.02

The `dtype` parameter is deprecated and will be removed in
a future version of cudf. Use the `astype` method instead.

names : list-like, default False
Kept compatibility with MultiIndex. Should not be used.

.. deprecated:: 23.04

The parameter `names` is deprecated and will be removed in
a future version of cudf. Use the `name` parameter instead.

Returns
-------
New index instance, casted to new dtype
New index instance.
"""
if dtype is not None:
warnings.warn(
"parameter dtype is deprecated and will be removed in a "
"future version. Use the astype method instead.",
FutureWarning,
)

if names is not None:
warnings.warn(
"parameter names is deprecated and will be removed in a "
"future version. Use the name parameter instead.",
FutureWarning,
)

dtype = self.dtype if dtype is None else dtype
name = self.name if name is None else name

col = self._values.astype(dtype)
return _index_from_data({name: col.copy(True) if deep else col})
return _index_from_data(
{name: self._values.copy(True) if deep else self._values}
)

@_cudf_nvtx_annotate
@doc_apply(_index_astype_docstring)
Expand Down
72 changes: 2 additions & 70 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import itertools
import numbers
import pickle
import warnings
from collections import abc
from functools import cached_property
from numbers import Integral
Expand Down Expand Up @@ -318,9 +317,6 @@ def name(self, value):
def copy(
self,
names=None,
dtype=None,
levels=None,
codes=None,
deep=False,
name=None,
):
Expand All @@ -334,36 +330,12 @@ def copy(
----------
names : sequence of objects, optional (default None)
Names for each of the index levels.
dtype : object, optional (default None)
MultiIndex dtype, only supports None or object type

.. deprecated:: 23.02

The `dtype` parameter is deprecated and will be removed in
a future version of cudf. Use the `astype` method instead.

levels : sequence of arrays, optional (default None)
The unique labels for each level. Original values used if None.

.. deprecated:: 23.02

The `levels` parameter is deprecated and will be removed in
a future version of cudf.

codes : sequence of arrays, optional (default None)
Integers for each level designating which label at each location.
Original values used if None.

.. deprecated:: 23.02

The `codes` parameter is deprecated and will be removed in
a future version of cudf.

deep : Bool (default False)
If True, `._data`, `._levels`, `._codes` will be copied. Ignored if
`levels` or `codes` are specified.
name : object, optional (default None)
To keep consistent with `Index.copy`, should not be used.
Kept for compatibility with 1-dimensional Index. Should not
be used.

Returns
-------
Expand Down Expand Up @@ -401,46 +373,6 @@ def copy(

"""

# TODO: Update message when set_levels is implemented.
# https://github.com/rapidsai/cudf/issues/12307
if levels is not None:
warnings.warn(
"parameter levels is deprecated and will be removed in a "
"future version.",
FutureWarning,
)

# TODO: Update message when set_codes is implemented.
# https://github.com/rapidsai/cudf/issues/12308
if codes is not None:
warnings.warn(
"parameter codes is deprecated and will be removed in a "
"future version.",
FutureWarning,
)

if dtype is not None:
warnings.warn(
"parameter dtype is deprecated and will be removed in a "
"future version. Use the astype method instead.",
FutureWarning,
)

dtype = object if dtype is None else dtype
if not pd.core.dtypes.common.is_object_dtype(dtype):
raise TypeError("Dtype for MultiIndex only supports object type.")

# ._data needs to be rebuilt
if levels is not None or codes is not None:
if self._levels is None or self._codes is None:
self._compute_levels_and_codes()
levels = self._levels if levels is None else levels
codes = self._codes if codes is None else codes
names = self.names if names is None else names

mi = MultiIndex(levels=levels, codes=codes, names=names, copy=deep)
return mi

mi = MultiIndex._from_data(self._data.copy(deep=deep))
if self._levels is not None:
mi._levels = [s.copy(deep) for s in self._levels]
Expand Down
58 changes: 18 additions & 40 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
NUMERIC_TYPES,
OTHER_TYPES,
SIGNED_INTEGER_TYPES,
SIGNED_TYPES,
UNSIGNED_TYPES,
_create_pandas_series,
assert_column_memory_eq,
Expand Down Expand Up @@ -307,90 +306,69 @@ def test_set_index_as_property():


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", SIGNED_INTEGER_TYPES)
def test_index_copy_range(name, dtype, deep=True):
def test_index_copy_range(name, deep=True):
cidx = cudf.RangeIndex(1, 5)
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype,", ["datetime64[ns]", "int64"])
def test_index_copy_datetime(name, dtype, deep=True):
def test_index_copy_datetime(name, deep=True):
cidx = cudf.DatetimeIndex(["2001", "2002", "2003"])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", ["category", "object"])
def test_index_copy_string(name, dtype, deep=True):
def test_index_copy_string(name, deep=True):
cidx = cudf.StringIndex(["a", "b", "c"])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize(
"dtype",
NUMERIC_TYPES + ["datetime64[ns]", "timedelta64[ns]"] + OTHER_TYPES,
)
def test_index_copy_integer(name, dtype, deep=True):
def test_index_copy_integer(name, deep=True):
"""Test for NumericIndex Copy Casts"""
cidx = cudf.Index([1, 2, 3])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", SIGNED_TYPES)
def test_index_copy_float(name, dtype, deep=True):
def test_index_copy_float(name, deep=True):
"""Test for NumericIndex Copy Casts"""
cidx = cudf.Index([1.0, 2.0, 3.0])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_eq(pidx_copy, cidx_copy)


@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["category"])
def test_index_copy_category(name, dtype, deep=True):
def test_index_copy_category(name, deep=True):
cidx = cudf.core.index.CategoricalIndex([1, 2, 3])
pidx = cidx.to_pandas()

with pytest.warns(FutureWarning):
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
with pytest.warns(FutureWarning):
cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype)
pidx_copy = pidx.copy(name=name, deep=deep)
cidx_copy = cidx.copy(name=name, deep=deep)

assert_column_memory_ne(cidx._values, cidx_copy._values)
assert_eq(pidx_copy, cidx_copy)
Expand Down
15 changes: 3 additions & 12 deletions python/cudf/cudf/tests/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,15 +700,8 @@ def test_multiindex_equals():
}
],
)
@pytest.mark.parametrize(
"levels",
[[["2000-01-01", "2000-01-02", "2000-01-03"], ["A", "B", "C"]], None],
)
@pytest.mark.parametrize(
"codes", [[[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]], None]
)
@pytest.mark.parametrize("names", [["X", "Y"]])
def test_multiindex_copy_sem(data, levels, codes, names):
def test_multiindex_copy_sem(data, names):
"""Test semantic equality for MultiIndex.copy"""
gdf = cudf.DataFrame(data)
pdf = gdf.to_pandas()
Expand All @@ -717,12 +710,10 @@ def test_multiindex_copy_sem(data, levels, codes, names):
pdf = pdf.groupby(["Date", "Symbol"], sort=True).mean()

gmi = gdf.index
with expect_warning_if(levels is not None or codes is not None):
gmi_copy = gmi.copy(levels=levels, codes=codes, names=names)
gmi_copy = gmi.copy(names=names)

pmi = pdf.index
with expect_warning_if(levels is not None or codes is not None):
pmi_copy = pmi.copy(levels=levels, codes=codes, names=names)
pmi_copy = pmi.copy(names=names)

for glv, plv in zip(gmi_copy.levels, pmi_copy.levels):
assert all(glv.values_host == plv.values)
Expand Down