Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nvtx annotatations to groupby methods #12941

Merged
merged 4 commits into from
Apr 26, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ def groups(self):
zip(group_names.to_pandas(), grouped_index._split(offsets[1:-1]))
)

@_cudf_nvtx_annotate
def get_group(self, name, obj=None):
"""
Construct DataFrame from group with provided name.
Expand Down Expand Up @@ -363,6 +364,7 @@ def get_group(self, name, obj=None):

return obj.loc[self.groups[name]]

@_cudf_nvtx_annotate
def size(self):
"""
Return the size of each group.
Expand All @@ -377,6 +379,7 @@ def size(self):
.agg("size")
)

@_cudf_nvtx_annotate
def cumcount(self):
"""
Return the cumulative count of keys in each group.
Expand All @@ -392,6 +395,7 @@ def cumcount(self):
.agg("cumcount")
)

@_cudf_nvtx_annotate
def rank(
self,
method="average",
Expand Down Expand Up @@ -781,6 +785,7 @@ def tail(self, n: int = 5, *, preserve_order: bool = True):
n, take_head=False, preserve_order=preserve_order
)

@_cudf_nvtx_annotate
def nth(self, n):
"""
Return the nth row from each group.
Expand All @@ -790,6 +795,7 @@ def nth(self, n):

return result[sizes > n]

@_cudf_nvtx_annotate
def ngroup(self, ascending=True):
"""
Number each group from 0 to the number of groups - 1.
Expand Down Expand Up @@ -1086,6 +1092,7 @@ def _normalize_aggs(
]
return column_names, columns, normalized_aggs

@_cudf_nvtx_annotate
def pipe(self, func, *args, **kwargs):
"""
Apply a function `func` with arguments to this GroupBy
Expand Down Expand Up @@ -1140,6 +1147,7 @@ def pipe(self, func, *args, **kwargs):
"""
return cudf.core.common.pipe(self, func, *args, **kwargs)

@_cudf_nvtx_annotate
def _jit_groupby_apply(
self, function, group_names, offsets, group_keys, grouped_values, *args
):
Expand All @@ -1161,6 +1169,7 @@ def _jit_groupby_apply(
result[None] = result.pop(0)
return result

@_cudf_nvtx_annotate
def _iterative_groupby_apply(
self, function, group_names, offsets, group_keys, grouped_values, *args
):
Expand Down Expand Up @@ -1198,6 +1207,7 @@ def _iterative_groupby_apply(
result.index = cudf.MultiIndex._from_data(index_data)
return result

@_cudf_nvtx_annotate
def apply(self, function, *args, engine="cudf"):
"""Apply a python transformation function over the grouped chunk.

Expand Down Expand Up @@ -1319,6 +1329,7 @@ def mult(df):
result = result.sort_index()
return result

@_cudf_nvtx_annotate
def apply_grouped(self, function, **kwargs):
"""Apply a transformation function over the grouped chunk.

Expand Down Expand Up @@ -1457,6 +1468,7 @@ def rolling_avg(val, avg):
kwargs.update({"chunks": offsets})
return grouped_values.apply_chunks(function, **kwargs)

@_cudf_nvtx_annotate
def _broadcast(self, values):
"""
Broadcast the results of an aggregation to the group
Expand All @@ -1480,6 +1492,7 @@ def _broadcast(self, values):
values.index = self.obj.index
return values

@_cudf_nvtx_annotate
def transform(self, function):
"""Apply an aggregation, then broadcast the result to the group size.

Expand Down Expand Up @@ -1534,6 +1547,7 @@ def rolling(self, *args, **kwargs):
"""
return cudf.core.window.rolling.RollingGroupby(self, *args, **kwargs)

@_cudf_nvtx_annotate
def count(self, dropna=True):
"""Compute the number of values in each column.

Expand All @@ -1548,6 +1562,7 @@ def func(x):

return self.agg(func)

@_cudf_nvtx_annotate
def describe(self, include=None, exclude=None):
"""
Generate descriptive statistics that summarizes the central tendency,
Expand Down Expand Up @@ -1619,6 +1634,7 @@ def describe(self, include=None, exclude=None):
)
return res

@_cudf_nvtx_annotate
def corr(self, method="pearson", min_periods=1):
"""
Compute pairwise correlation of columns, excluding NA/null values.
Expand Down Expand Up @@ -1680,6 +1696,7 @@ def corr(self, method="pearson", min_periods=1):
lambda x: x.corr(method, min_periods), "Correlation"
)

@_cudf_nvtx_annotate
def cov(self, min_periods=0, ddof=1):
"""
Compute the pairwise covariance among the columns of a DataFrame,
Expand Down Expand Up @@ -1854,6 +1871,7 @@ def _cov_or_corr(self, func, method_name):

return res

@_cudf_nvtx_annotate
def var(self, ddof=1):
"""Compute the column-wise variance of the values in each group.

Expand All @@ -1869,6 +1887,7 @@ def func(x):

return self.agg(func)

@_cudf_nvtx_annotate
def std(self, ddof=1):
"""Compute the column-wise std of the values in each group.

Expand All @@ -1884,6 +1903,7 @@ def func(x):

return self.agg(func)

@_cudf_nvtx_annotate
def quantile(self, q=0.5, interpolation="linear"):
"""Compute the column-wise quantiles of the values in each group.

Expand All @@ -1901,14 +1921,17 @@ def func(x):

return self.agg(func)

@_cudf_nvtx_annotate
def collect(self):
"""Get a list of all the values for each column in each group."""
return self.agg("collect")

@_cudf_nvtx_annotate
def unique(self):
"""Get a list of the unique values for each column in each group."""
return self.agg("unique")

@_cudf_nvtx_annotate
def diff(self, periods=1, axis=0):
"""Get the difference between the values in each group.

Expand Down Expand Up @@ -1945,6 +1968,7 @@ def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries:
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)

@_cudf_nvtx_annotate
def pad(self, limit=None):
"""Forward fill NA values.

Expand Down Expand Up @@ -1981,6 +2005,7 @@ def ffill(self, limit=None):

return self._scan_fill("ffill", limit)

@_cudf_nvtx_annotate
def backfill(self, limit=None):
"""Backward fill NA values.

Expand Down Expand Up @@ -2015,6 +2040,7 @@ def bfill(self, limit=None):

return self._scan_fill("bfill", limit)

@_cudf_nvtx_annotate
def fillna(
self,
value=None,
Expand Down Expand Up @@ -2078,6 +2104,7 @@ def fillna(
value=value, inplace=inplace, axis=axis, limit=limit
)

@_cudf_nvtx_annotate
def shift(self, periods=1, freq=None, axis=0, fill_value=None):
"""
Shift each group by ``periods`` positions.
Expand Down Expand Up @@ -2133,6 +2160,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)

@_cudf_nvtx_annotate
def pct_change(
self, periods=1, fill_method="ffill", axis=0, limit=None, freq=None
):
Expand Down