Skip to content

Commit

Permalink
Add nvtx annotatations to groupby methods (#12941)
Browse files Browse the repository at this point in the history
Not sure if not annotating these was an oversight during implementation...

Authors:
  - Lawrence Mitchell (https://github.com/wence-)
  - Ray Douglass (https://github.com/raydouglass)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #12941
  • Loading branch information
wence- authored Apr 26, 2023
1 parent bd04975 commit 62c43a8
Showing 1 changed file with 28 additions and 0 deletions.
28 changes: 28 additions & 0 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ def groups(self):
zip(group_names.to_pandas(), grouped_index._split(offsets[1:-1]))
)

@_cudf_nvtx_annotate
def get_group(self, name, obj=None):
"""
Construct DataFrame from group with provided name.
Expand Down Expand Up @@ -363,6 +364,7 @@ def get_group(self, name, obj=None):

return obj.loc[self.groups[name]]

@_cudf_nvtx_annotate
def size(self):
"""
Return the size of each group.
Expand All @@ -377,6 +379,7 @@ def size(self):
.agg("size")
)

@_cudf_nvtx_annotate
def cumcount(self):
"""
Return the cumulative count of keys in each group.
Expand All @@ -392,6 +395,7 @@ def cumcount(self):
.agg("cumcount")
)

@_cudf_nvtx_annotate
def rank(
self,
method="average",
Expand Down Expand Up @@ -781,6 +785,7 @@ def tail(self, n: int = 5, *, preserve_order: bool = True):
n, take_head=False, preserve_order=preserve_order
)

@_cudf_nvtx_annotate
def nth(self, n):
"""
Return the nth row from each group.
Expand All @@ -790,6 +795,7 @@ def nth(self, n):

return result[sizes > n]

@_cudf_nvtx_annotate
def ngroup(self, ascending=True):
"""
Number each group from 0 to the number of groups - 1.
Expand Down Expand Up @@ -1086,6 +1092,7 @@ def _normalize_aggs(
]
return column_names, columns, normalized_aggs

@_cudf_nvtx_annotate
def pipe(self, func, *args, **kwargs):
"""
Apply a function `func` with arguments to this GroupBy
Expand Down Expand Up @@ -1140,6 +1147,7 @@ def pipe(self, func, *args, **kwargs):
"""
return cudf.core.common.pipe(self, func, *args, **kwargs)

@_cudf_nvtx_annotate
def _jit_groupby_apply(
self, function, group_names, offsets, group_keys, grouped_values, *args
):
Expand All @@ -1161,6 +1169,7 @@ def _jit_groupby_apply(
result[None] = result.pop(0)
return result

@_cudf_nvtx_annotate
def _iterative_groupby_apply(
self, function, group_names, offsets, group_keys, grouped_values, *args
):
Expand Down Expand Up @@ -1198,6 +1207,7 @@ def _iterative_groupby_apply(
result.index = cudf.MultiIndex._from_data(index_data)
return result

@_cudf_nvtx_annotate
def apply(self, function, *args, engine="cudf"):
"""Apply a python transformation function over the grouped chunk.
Expand Down Expand Up @@ -1319,6 +1329,7 @@ def mult(df):
result = result.sort_index()
return result

@_cudf_nvtx_annotate
def apply_grouped(self, function, **kwargs):
"""Apply a transformation function over the grouped chunk.
Expand Down Expand Up @@ -1457,6 +1468,7 @@ def rolling_avg(val, avg):
kwargs.update({"chunks": offsets})
return grouped_values.apply_chunks(function, **kwargs)

@_cudf_nvtx_annotate
def _broadcast(self, values):
"""
Broadcast the results of an aggregation to the group
Expand All @@ -1480,6 +1492,7 @@ def _broadcast(self, values):
values.index = self.obj.index
return values

@_cudf_nvtx_annotate
def transform(self, function):
"""Apply an aggregation, then broadcast the result to the group size.
Expand Down Expand Up @@ -1534,6 +1547,7 @@ def rolling(self, *args, **kwargs):
"""
return cudf.core.window.rolling.RollingGroupby(self, *args, **kwargs)

@_cudf_nvtx_annotate
def count(self, dropna=True):
"""Compute the number of values in each column.
Expand All @@ -1548,6 +1562,7 @@ def func(x):

return self.agg(func)

@_cudf_nvtx_annotate
def describe(self, include=None, exclude=None):
"""
Generate descriptive statistics that summarizes the central tendency,
Expand Down Expand Up @@ -1619,6 +1634,7 @@ def describe(self, include=None, exclude=None):
)
return res

@_cudf_nvtx_annotate
def corr(self, method="pearson", min_periods=1):
"""
Compute pairwise correlation of columns, excluding NA/null values.
Expand Down Expand Up @@ -1680,6 +1696,7 @@ def corr(self, method="pearson", min_periods=1):
lambda x: x.corr(method, min_periods), "Correlation"
)

@_cudf_nvtx_annotate
def cov(self, min_periods=0, ddof=1):
"""
Compute the pairwise covariance among the columns of a DataFrame,
Expand Down Expand Up @@ -1854,6 +1871,7 @@ def _cov_or_corr(self, func, method_name):

return res

@_cudf_nvtx_annotate
def var(self, ddof=1):
"""Compute the column-wise variance of the values in each group.
Expand All @@ -1869,6 +1887,7 @@ def func(x):

return self.agg(func)

@_cudf_nvtx_annotate
def std(self, ddof=1):
"""Compute the column-wise std of the values in each group.
Expand All @@ -1884,6 +1903,7 @@ def func(x):

return self.agg(func)

@_cudf_nvtx_annotate
def quantile(self, q=0.5, interpolation="linear"):
"""Compute the column-wise quantiles of the values in each group.
Expand All @@ -1901,14 +1921,17 @@ def func(x):

return self.agg(func)

@_cudf_nvtx_annotate
def collect(self):
"""Get a list of all the values for each column in each group."""
return self.agg("collect")

@_cudf_nvtx_annotate
def unique(self):
"""Get a list of the unique values for each column in each group."""
return self.agg("unique")

@_cudf_nvtx_annotate
def diff(self, periods=1, axis=0):
"""Get the difference between the values in each group.
Expand Down Expand Up @@ -1945,6 +1968,7 @@ def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries:
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)

@_cudf_nvtx_annotate
def pad(self, limit=None):
"""Forward fill NA values.
Expand Down Expand Up @@ -1981,6 +2005,7 @@ def ffill(self, limit=None):

return self._scan_fill("ffill", limit)

@_cudf_nvtx_annotate
def backfill(self, limit=None):
"""Backward fill NA values.
Expand Down Expand Up @@ -2015,6 +2040,7 @@ def bfill(self, limit=None):

return self._scan_fill("bfill", limit)

@_cudf_nvtx_annotate
def fillna(
self,
value=None,
Expand Down Expand Up @@ -2078,6 +2104,7 @@ def fillna(
value=value, inplace=inplace, axis=axis, limit=limit
)

@_cudf_nvtx_annotate
def shift(self, periods=1, freq=None, axis=0, fill_value=None):
"""
Shift each group by ``periods`` positions.
Expand Down Expand Up @@ -2133,6 +2160,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)

@_cudf_nvtx_annotate
def pct_change(
self, periods=1, fill_method="ffill", axis=0, limit=None, freq=None
):
Expand Down

0 comments on commit 62c43a8

Please sign in to comment.