diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py index 62f8377a323..ffe89e3e779 100644 --- a/python/cudf/cudf/api/types.py +++ b/python/cudf/cudf/api/types.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. """Define common type operations.""" @@ -244,7 +244,6 @@ def _union_categoricals( is_datetime64_dtype = pd_types.is_datetime64_dtype is_datetime64_ns_dtype = pd_types.is_datetime64_ns_dtype is_datetime64tz_dtype = pd_types.is_datetime64tz_dtype -is_extension_type = pd_types.is_extension_type is_extension_array_dtype = pd_types.is_extension_array_dtype is_float_dtype = _wrap_pandas_is_dtype_api(pd_types.is_float_dtype) is_int64_dtype = pd_types.is_int64_dtype @@ -263,7 +262,7 @@ def _union_categoricals( is_named_tuple = pd_types.is_named_tuple is_iterator = pd_types.is_iterator is_bool = pd_types.is_bool -is_categorical = pd_types.is_categorical +is_categorical = pd_types.is_categorical_dtype is_complex = pd_types.is_complex is_float = pd_types.is_float is_hashable = pd_types.is_hashable diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index d43621d3d36..19f19cd2cb0 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -7211,12 +7211,18 @@ def value_counts( >>> df = cudf.DataFrame({'num_legs': [2, 4, 4, 6], ... 'num_wings': [2, 0, 0, 0]}, ... index=['falcon', 'dog', 'cat', 'ant']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + cat 4 0 + ant 6 0 >>> df.value_counts() num_legs num_wings 4 0 2 2 2 1 6 0 1 - dtype: int64 + Name: count, dtype: int64 """ if subset: diff = set(subset) - set(self._data) @@ -7238,6 +7244,7 @@ def value_counts( # Pandas always returns MultiIndex even if only one column. if not isinstance(result.index, MultiIndex): result.index = MultiIndex._from_data(result._index._data) + result.name = "proportion" if normalize else "count" return result diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 60655c5a6f9..7838e9409a2 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2946,7 +2946,7 @@ def value_counts( 3.0 3 2.0 2 1.0 1 - dtype: int32 + Name: count, dtype: int32 The order of the counts can be changed by passing ``ascending=True``: @@ -2954,7 +2954,7 @@ def value_counts( 1.0 1 2.0 2 3.0 3 - dtype: int32 + Name: count, dtype: int32 With ``normalize`` set to True, returns the relative frequency by dividing all values by the sum of values. @@ -2963,7 +2963,7 @@ def value_counts( 3.0 0.500000 2.0 0.333333 1.0 0.166667 - dtype: float32 + Name: proportion, dtype: float32 To include ``NA`` value counts, pass ``dropna=False``: @@ -2983,24 +2983,24 @@ def value_counts( 2.0 2 2 1.0 1 - dtype: int32 + Name: count, dtype: int32 >>> s = cudf.Series([3, 1, 2, 3, 4, np.nan]) >>> s.value_counts(bins=3) (2.0, 3.0] 2 (0.996, 2.0] 2 (3.0, 4.0] 1 - dtype: int32 + Name: count, dtype: int32 """ if bins is not None: series_bins = cudf.cut(self, bins, include_lowest=True) - + result_name = "proportion" if normalize else "count" if dropna and self.null_count == len(self): return Series( [], dtype=np.int32, - name=self.name, - index=cudf.Index([], dtype=self.dtype), + name=result_name, + index=cudf.Index([], dtype=self.dtype, name=self.name), ) if bins is not None: @@ -3009,7 +3009,7 @@ def value_counts( else: res = self.groupby(self, dropna=dropna).count(dropna=dropna) - res.index.name = None + res.index.name = self.name if sort: res = res.sort_values(ascending=ascending) @@ -3024,7 +3024,7 @@ def value_counts( res.index._column, res.index.categories.dtype ) res.index = int_index - + res.name = result_name return res @_cudf_nvtx_annotate