From 314a57beb7ff0c04c7111802112b57ea0664c42d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 16 May 2023 10:56:59 -0700 Subject: [PATCH 1/3] Deprecated StringIndex --- python/cudf/cudf/core/dataframe.py | 4 ++-- python/cudf/cudf/core/index.py | 6 ++++++ python/cudf/cudf/tests/test_dataframe.py | 2 +- python/cudf/cudf/tests/test_groupby.py | 2 +- python/cudf/cudf/tests/test_index.py | 8 +++++--- python/cudf/cudf/tests/test_monotonic.py | 5 ++--- python/cudf/cudf/tests/test_multiindex.py | 4 ++-- python/cudf/cudf/tests/test_string.py | 5 +++-- python/cudf/cudf/tests/test_text.py | 2 +- 9 files changed, 23 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index afd2a59037f..eb6685861d4 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5853,7 +5853,7 @@ def _reduce( ) source = self._get_columns_by_label(numeric_cols) if source.empty: - return Series(index=cudf.StringIndex([])) + return Series(index=cudf.Index([], dtype="str")) axis = source._get_axis_from_axis_arg(axis) @@ -5895,7 +5895,7 @@ def _reduce( ) source = self._get_columns_by_label(numeric_cols) if source.empty: - return Series(index=cudf.StringIndex([])) + return Series(index=cudf.Index([], dtype="str")) try: result = [ getattr(source._data[col], op)(**kwargs) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index f5dc8298bd2..c3d946f5cdc 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -3072,6 +3072,12 @@ class StringIndex(GenericIndex): @_cudf_nvtx_annotate def __init__(self, values, copy=False, **kwargs): + warnings.warn( + f"cudf.{self.__class__.__name__} is deprecated and will be " + "removed from cudf in a future version. Use cudf.Index with the " + "appropriate dtype instead.", + FutureWarning, + ) kwargs = _setdefault_name(values, **kwargs) if isinstance(values, StringColumn): values = values.copy(deep=copy) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 918bd995ed1..6a01e798e19 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -4114,7 +4114,7 @@ def test_as_column_types(): assert_eq(pds, gds) pds = pd.Series(pd.Index(["1", "18", "9"]), dtype="int") - gds = cudf.Series(cudf.StringIndex(["1", "18", "9"]), dtype="int") + gds = cudf.Series(cudf.Index(["1", "18", "9"]), dtype="int") assert_eq(pds, gds) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index e5199146fef..7651382e2ac 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -3084,7 +3084,7 @@ def index(self, request): [2, 3, 4, 1, 0, 5, 6, 8, 7, 9, 10, 13], dtype="int32" ) elif request.param == "strindex": - return cudf.StringIndex(list(string.ascii_lowercase[:n])) + return cudf.Index(list(string.ascii_lowercase[:n])) elif request.param == "default": return None diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 0b0c5fba7fa..f750a91a695 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -337,7 +337,8 @@ def test_index_copy_datetime(name, dtype, deep=True): @pytest.mark.parametrize("name", ["x"]) @pytest.mark.parametrize("dtype", ["category", "object"]) def test_index_copy_string(name, dtype, deep=True): - cidx = cudf.StringIndex(["a", "b", "c"]) + with pytest.warns(FutureWarning): + cidx = cudf.StringIndex(["a", "b", "c"]) pidx = cidx.to_pandas() with pytest.warns(FutureWarning): @@ -401,7 +402,7 @@ def test_index_copy_category(name, dtype, deep=True): "idx", [ cudf.DatetimeIndex(["2001", "2002", "2003"]), - cudf.StringIndex(["a", "b", "c"]), + cudf.Index(["a", "b", "c"]), cudf.Index([1, 2, 3]), cudf.Index([1.0, 2.0, 3.0]), cudf.CategoricalIndex([1, 2, 3]), @@ -455,7 +456,8 @@ def test_rangeindex_slice_attr_name(): def test_from_pandas_str(): idx = ["a", "b", "c"] pidx = pd.Index(idx, name="idx") - gidx_1 = cudf.StringIndex(idx, name="idx") + with pytest.warns(FutureWarning): + gidx_1 = cudf.StringIndex(idx, name="idx") gidx_2 = cudf.from_pandas(pidx) assert_eq(gidx_1, gidx_2) diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py index f4e8b80342a..887d61aa152 100644 --- a/python/cudf/cudf/tests/test_monotonic.py +++ b/python/cudf/cudf/tests/test_monotonic.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. """ Tests related to is_unique and is_monotonic attributes @@ -14,7 +14,6 @@ DatetimeIndex, GenericIndex, RangeIndex, - StringIndex, ) from cudf.testing._utils import assert_eq, expect_warning_if @@ -78,7 +77,7 @@ def test_generic_index(testlist): ) def test_string_index(testlist): - index = StringIndex(testlist) + index = cudf.Index(testlist) index_pd = pd.Index(testlist) assert index.is_unique == index_pd.is_unique diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index a0e027d4c86..edbb122b9c0 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -155,7 +155,7 @@ def test_multiindex_swaplevel(): def test_string_index(): - from cudf.core.index import StringIndex + from cudf.core.index import Index pdf = pd.DataFrame(np.random.rand(5, 5)) gdf = cudf.from_pandas(pdf) @@ -167,7 +167,7 @@ def test_string_index(): pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) - stringIndex = StringIndex(["a", "b", "c", "d", "e"], name="name") + stringIndex = Index(["a", "b", "c", "d", "e"], name="name") pdf.index = stringIndex.to_pandas() gdf.index = stringIndex assert_eq(pdf, gdf) diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 12e832ba23b..816eb6468b0 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -1075,7 +1075,8 @@ def test_string_index(): pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) - stringIndex = StringIndex(["a", "b", "c", "d", "e"], name="name") + with pytest.warns(FutureWarning): + stringIndex = StringIndex(["a", "b", "c", "d", "e"], name="name") pdf.index = stringIndex.to_pandas() gdf.index = stringIndex assert_eq(pdf, gdf) @@ -2755,7 +2756,7 @@ def test_string_str_subscriptable(data, index): assert_eq(psr.str[index], gsr.str[index]) psi = pd.Index(data) - gsi = StringIndex(data) + gsi = cudf.Index(data) assert_eq(psi.str[index], gsi.str[index]) diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py index 46ee7b58c87..899248513de 100644 --- a/python/cudf/cudf/tests/test_text.py +++ b/python/cudf/cudf/tests/test_text.py @@ -548,7 +548,7 @@ def test_character_tokenize_index(): assert_eq(expected, actual) sr = cudf.Index([""]) - expected = cudf.StringIndex([], dtype="object") + expected = cudf.Index([], dtype="object") actual = sr.str.character_tokenize() assert_eq(expected, actual) From 46156515b4e6985804b75dda584e7015a28bc783 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 16 May 2023 11:00:56 -0700 Subject: [PATCH 2/3] update docs --- python/cudf/cudf/core/index.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index c3d946f5cdc..af02fdf37c4 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -3064,6 +3064,9 @@ def _is_boolean(self): class StringIndex(GenericIndex): """String defined indices into another Column + .. deprecated:: 23.06 + `StringIndex` is deprecated, use `Index` instead. + Attributes ---------- _values: A StringColumn object or NDArray of strings From 55eca5e626adc8c342761c474dd3b2bbf82d10f3 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 17 May 2023 08:01:01 -0700 Subject: [PATCH 3/3] Use Index --- python/cudf/cudf/tests/test_index.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index f750a91a695..4ce7b00dc01 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -337,8 +337,7 @@ def test_index_copy_datetime(name, dtype, deep=True): @pytest.mark.parametrize("name", ["x"]) @pytest.mark.parametrize("dtype", ["category", "object"]) def test_index_copy_string(name, dtype, deep=True): - with pytest.warns(FutureWarning): - cidx = cudf.StringIndex(["a", "b", "c"]) + cidx = cudf.Index(["a", "b", "c"]) pidx = cidx.to_pandas() with pytest.warns(FutureWarning): @@ -456,8 +455,7 @@ def test_rangeindex_slice_attr_name(): def test_from_pandas_str(): idx = ["a", "b", "c"] pidx = pd.Index(idx, name="idx") - with pytest.warns(FutureWarning): - gidx_1 = cudf.StringIndex(idx, name="idx") + gidx_1 = cudf.Index(idx, name="idx") gidx_2 = cudf.from_pandas(pidx) assert_eq(gidx_1, gidx_2)