rapidsai · rapids-bot · May 21, 2024 · May 10, 2024 · May 10, 2024 · May 10, 2024
@@ -3,7 +3,7 @@ from functools import singledispatch
 
 from pandas.errors import DataError
 
-from cudf.api.types import is_string_dtype
+from cudf.api.types import _is_categorical_dtype, is_string_dtype
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.dtypes import (
     CategoricalDtype,
@@ -167,6 +167,46 @@ cdef class GroupBy:
             included_aggregations_i = []
             col_aggregations = []
             for agg in aggs:
+                str_agg = str(agg)
+                if (
+                    is_string_dtype(col)
+                    and agg not in _STRING_AGGS
+                    and
+                    (
+                        str_agg in {"cumsum", "cummin", "cummax"}
+                        or not (
+                        any(a in str_agg for a in {
+                            "count",
+                            "max",
+                            "min",
+                            "first",
+                            "last",
+                            "nunique",
+                            "unique",
+                            "nth"
+                        })
+                        or (agg is list)
+                        )
+                    )
+                ):
+                    raise TypeError(
+                        f"function is not supported for this dtype: {agg}"
+                    )
+                elif (
+                    _is_categorical_dtype(col)
+                    and agg not in _CATEGORICAL_AGGS
+                    and (
+                        str_agg in {"cumsum", "cummin", "cummax"}
+                        or
+                        not (
+                            any(a in str_agg for a in {"count", "max", "min", "unique"})
+                        )
+                    )
+                ):
+                    raise TypeError(
+                        f"{col.dtype} type does not support {agg} operations"
+                    )
+
                 agg_obj = make_aggregation(agg)
                 if valid_aggregations == "ALL" or agg_obj.kind in valid_aggregations:
                     included_aggregations_i.append((agg, agg_obj.kind))

@@ -504,7 +504,6 @@ def test_groupby_apply_jit_unary_reductions(
     func, dtype, dataset, groupby_jit_datasets
 ):
     dataset = groupby_jit_datasets[dataset]
-
     groupby_apply_jit_reductions_test_inner(func, dataset, dtype)
 
 
@@ -1891,9 +1890,6 @@ def test_groupby_nth(n, by):
     assert_groupby_results_equal(expect, got, check_dtype=False)
 
 
-@pytest.mark.xfail(
-    reason="https://github.com/pandas-dev/pandas/issues/43209",
-)
 def test_raise_data_error():
     pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
     gdf = cudf.from_pandas(pdf)
@@ -1904,12 +1900,13 @@ def test_raise_data_error():
     )
 
 
-def test_drop_unsupported_multi_agg():
+def test_multi_agg():
     gdf = cudf.DataFrame(
         {"a": [1, 1, 2, 2], "b": [1, 2, 3, 4], "c": ["a", "b", "c", "d"]}
     )
+    pdf = gdf.to_pandas()
     assert_groupby_results_equal(
-        gdf.groupby("a").agg(["count", "mean"]),
+        pdf.groupby("a").agg({"b": ["count", "mean"], "c": ["count"]}),
         gdf.groupby("a").agg({"b": ["count", "mean"], "c": ["count"]}),
     )
 
@@ -3852,3 +3849,39 @@ def test_group_by_reduce_numeric_only(by, data, func):
     )
     result = getattr(df.groupby(by, sort=True), func)(numeric_only=True)
     assert_eq(expected, result)
+
+
+@pytest.mark.parametrize(
+    "op", ["cummax", "cummin", "cumprod", "cumsum", "mean", "median"]
+)
+def test_group_by_raises_string_error(op):
+    df = cudf.DataFrame({"a": [1, 2, 3, 4, 5], "b": ["a", "b", "c", "d", "e"]})
+
+    with pytest.raises(TypeError):
+        df.groupby(df.a).agg(op)
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        "cummax",
+        "cummin",
+        "cumprod",
+        "cumsum",
+        "mean",
+        "median",
+        "prod",
+        "sum",
+        list,
+    ],
+)
+def test_group_by_raises_category_error(op):
+    df = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": cudf.Series(["a", "b", "c", "d", "e"], dtype="category"),
+        }
+    )
+
+    with pytest.raises(TypeError):
+        df.groupby(df.a).agg(op)