From 5cce7b616b78be7fbc0553e153400e6e922a58c1 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 2 Feb 2022 13:49:21 -0800
Subject: [PATCH 1/8] add new annotations

---
 python/cudf/cudf/core/dataframe.py | 100 +++++++++++++++++++++++-
 python/cudf/cudf/core/frame.py     | 117 ++++++++++++++++++++++++++++-
 python/cudf/cudf/io/parquet.py     |  17 +++++
 3 files changed, 228 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 323a5ad088a..3131e10d52c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -111,6 +111,7 @@ def __setitem__(self, key, value):
             key = (key, slice(None))
         return self._setitem_tuple_arg(key, value)
 
+    @annotate("_CAN_DOWNCAST_TO_SERIES", color="green", domain="cudf_python")
     def _can_downcast_to_series(self, df, arg):
         """
         This method encapsulates the logic used
@@ -151,6 +152,7 @@ def _can_downcast_to_series(self, df, arg):
                 return True
         return False
 
+    @annotate("_DOWNCAST_TO_SERIES", color="green", domain="cudf_python")
     def _downcast_to_series(self, df, arg):
         """
         "Downcast" from a DataFrame to a Series
@@ -192,6 +194,7 @@ class _DataFrameLocIndexer(_DataFrameIndexer):
     For selection by label.
     """
 
+    @annotate("_GETITEM_SCALAR", color="green", domain="cudf_python")
     def _getitem_scalar(self, arg):
         return self._frame[arg[1]].loc[arg[0]]
 
@@ -628,6 +631,9 @@ def __init__(
         if dtype:
             self._data = self.astype(dtype)._data
 
+    @annotate(
+        "DATAFRAME_INIT_FROM_SERIES_LIST", color="blue", domain="cudf_python"
+    )
     def _init_from_series_list(self, data, columns, index):
         if index is None:
             # When `index` is `None`, the final index of
@@ -726,6 +732,9 @@ def _init_from_series_list(self, data, columns, index):
                     )
             self._data = self._data.select_by_label(columns)
 
+    @annotate(
+        "DATAFRAME_INIT_FROM_LIST_LIKE", color="blue", domain="cudf_python"
+    )
     def _init_from_list_like(self, data, index=None, columns=None):
         if index is None:
             index = RangeIndex(start=0, stop=len(data))
@@ -762,6 +771,9 @@ def _init_from_list_like(self, data, index=None, columns=None):
 
             self.columns = columns
 
+    @annotate(
+        "DATAFRAME_INIT_FROM_DICT_LIKE", color="blue", domain="cudf_python"
+    )
     def _init_from_dict_like(
         self, data, index=None, columns=None, nan_as_null=None
     ):
@@ -835,6 +847,11 @@ def _from_data(
         return out
 
     @staticmethod
+    @annotate(
+        "DATAFRAME_ALIGN_INPUT_SERIES_INDICES",
+        color="blue",
+        domain="cudf_python",
+    )
     def _align_input_series_indices(data, index):
         data = data.copy()
 
@@ -1180,6 +1197,7 @@ def __delitem__(self, name):
         """
         self._drop_column(name)
 
+    @annotate("DATAFRAME_SLICE", color="blue", domain="cudf_python")
     def _slice(self: T, arg: slice) -> T:
         """
         _slice : slice the frame as per the arg
@@ -1241,6 +1259,7 @@ def _slice(self: T, arg: slice) -> T:
                 result.columns = self.columns
                 return result
 
+    @annotate("DATAFRAME_MEMORY_USAGE", color="blue", domain="cudf_python")
     def memory_usage(self, index=True, deep=False):
         """
         Return the memory usage of each column in bytes.
@@ -1303,6 +1322,7 @@ def memory_usage(self, index=True, deep=False):
             sizes.append(self.index.memory_usage())
         return Series(sizes, index=ind)
 
+    @annotate("DATAFRAME_ARRAY_UFUNC", color="blue", domain="cudf_python")
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         if method == "__call__" and hasattr(cudf, ufunc.__name__):
             func = getattr(cudf, ufunc.__name__)
@@ -1310,6 +1330,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         else:
             return NotImplemented
 
+    @annotate("DATAFRAME_ARRAY_FUNCTION", color="blue", domain="cudf_python")
     def __array_function__(self, func, types, args, kwargs):
 
         cudf_df_module = DataFrame
@@ -1348,6 +1369,7 @@ def __array_function__(self, func, types, args, kwargs):
             return NotImplemented
 
     # The _get_numeric_data method is necessary for dask compatibility.
+    @annotate("DATAFRAME_GET_NUMERIC_DATA", color="blue", domain="cudf_python")
     def _get_numeric_data(self):
         """Return a dataframe with only numeric data types"""
         columns = [
@@ -1357,6 +1379,7 @@ def _get_numeric_data(self):
         ]
         return self[columns]
 
+    @annotate("DATAFRAME_ASSIGN", color="blue", domain="cudf_python")
     def assign(self, **kwargs):
         """
         Assign columns to DataFrame from keyword arguments.
@@ -1835,10 +1858,12 @@ def _get_renderable_dataframe(self):
 
         return output
 
+    @annotate("DATAFRAME_REPR", color="blue", domain="cudf_python")
     def __repr__(self):
         output = self._get_renderable_dataframe()
         return self._clean_renderable_dataframe(output)
 
+    @annotate("DATAFRAME_REPR_HTML", color="blue", domain="cudf_python")
     def _repr_html_(self):
         lines = (
             self._get_renderable_dataframe()
@@ -1855,9 +1880,13 @@ def _repr_html_(self):
             lines.append("</div>")
         return "\n".join(lines)
 
+    @annotate("DATAFRAME_REPR_LATEX", color="blue", domain="cudf_python")
     def _repr_latex_(self):
         return self._get_renderable_dataframe().to_pandas()._repr_latex_()
 
+    @annotate(
+        "DATAFRAME_GET_COLUMNS_BY_LABEL", color="blue", domain="cudf_python"
+    )
     def _get_columns_by_label(self, labels, downcast=False):
         """
         Return columns of dataframe by `labels`
@@ -1880,6 +1909,7 @@ def _get_columns_by_label(self, labels, downcast=False):
         )
         return out
 
+    @annotate("DATAFRAME_BINARYOP", color="blue", domain="cudf_python")
     def _binaryop(
         self,
         other: Any,
@@ -1970,6 +2000,7 @@ def _binaryop(
             index=lhs._index,
         )
 
+    @annotate("DATAFRAME_UPDATE", color="blue", domain="cudf_python")
     def update(
         self,
         other,
@@ -2063,14 +2094,17 @@ def update(
 
         self._mimic_inplace(source_df, inplace=True)
 
+    @annotate("DATAFRAME_ITER", color="blue", domain="cudf_python")
     def __iter__(self):
         return iter(self.columns)
 
+    @annotate("DATAFRAME_ITERITEMS", color="blue", domain="cudf_python")
     def iteritems(self):
         """Iterate over column names and series pairs"""
         for k in self:
             yield (k, self[k])
 
+    @annotate("DATAFRAME_EQUALS", color="blue", domain="cudf_python")
     def equals(self, other, **kwargs):
         ret = super().equals(other)
         # If all other checks matched, validate names.
@@ -2133,6 +2167,7 @@ def columns(self, columns):
             data, multiindex=is_multiindex, level_names=columns.names,
         )
 
+    @annotate("DATAFRAME_REINDEX_INTERNAL", color="blue", domain="cudf_python")
     def _reindex(
         self, columns, dtypes=None, deep=False, index=None, inplace=False
     ):
@@ -2209,6 +2244,7 @@ def _reindex(
 
         return self._mimic_inplace(result, inplace=inplace)
 
+    @annotate("DATAFRAME_REINDEX", color="blue", domain="cudf_python")
     def reindex(
         self, labels=None, axis=None, index=None, columns=None, copy=True
     ):
@@ -2287,6 +2323,7 @@ def reindex(
             inplace=False,
         )
 
+    @annotate("DATAFRAME_SET_INDEX", color="blue", domain="cudf_python")
     def set_index(
         self,
         keys,
@@ -2547,12 +2584,13 @@ def reset_index(
             inplace=inplace,
         )
 
+    @annotate("DATAFRAME_TAKE", color="blue", domain="cudf_python")
     def take(self, indices, axis=0):
         out = super().take(indices)
         out.columns = self.columns
         return out
 
-    @annotate("INSERT", color="green", domain="cudf_python")
+    @annotate("DATAFRAME_INSERT", color="green", domain="cudf_python")
     def insert(self, loc, name, value, nan_as_null=None):
         """Add a column to DataFrame at the index specified by loc.
 
@@ -2604,6 +2642,7 @@ def insert(self, loc, name, value, nan_as_null=None):
 
         self._data.insert(name, value, loc=loc)
 
+    @annotate("DATAFRAME_DROP", color="green", domain="cudf_python")
     def drop(
         self,
         labels=None,
@@ -2777,12 +2816,14 @@ def drop(
         if not inplace:
             return out
 
+    @annotate("DATAFRAME_DROP_COLUMN", color="green", domain="cudf_python")
     def _drop_column(self, name):
         """Drop a column by *name*"""
         if name not in self._data:
             raise KeyError(f"column '{name}' does not exist")
         del self._data[name]
 
+    @annotate("DATAFRAME_DROP_DUPLICATES", color="green", domain="cudf_python")
     def drop_duplicates(
         self, subset=None, keep="first", inplace=False, ignore_index=False
     ):
@@ -2860,12 +2901,14 @@ def drop_duplicates(
 
         return self._mimic_inplace(outdf, inplace=inplace)
 
+    @annotate("DATAFRAME_POP", color="green", domain="cudf_python")
     def pop(self, item):
         """Return a column and drop it from the DataFrame."""
         popped = self[item]
         del self[item]
         return popped
 
+    @annotate("DATAFRAME_RENAME", color="green", domain="cudf_python")
     def rename(
         self,
         mapper=None,
@@ -3009,6 +3052,7 @@ def rename(
         else:
             return out.copy(deep=copy)
 
+    @annotate("DATAFRAME_ADD_PREFIX", color="green", domain="cudf_python")
     def add_prefix(self, prefix):
         out = self.copy(deep=True)
         out.columns = [
@@ -3016,6 +3060,7 @@ def add_prefix(self, prefix):
         ]
         return out
 
+    @annotate("DATAFRAME_ADD_SUFFIX", color="green", domain="cudf_python")
     def add_suffix(self, suffix):
         out = self.copy(deep=True)
         out.columns = [
@@ -3023,6 +3068,7 @@ def add_suffix(self, suffix):
         ]
         return out
 
+    @annotate("DATAFRAME_AGG", color="green", domain="cudf_python")
     def agg(self, aggs, axis=None):
         """
         Aggregate using one or more operations over the specified axis.
@@ -3154,6 +3200,7 @@ def agg(self, aggs, axis=None):
         else:
             raise ValueError("argument must be a string, list or dict")
 
+    @annotate("DATAFRAME_NLARGEST", color="green", domain="cudf_python")
     def nlargest(self, n, columns, keep="first"):
         """Get the rows of the DataFrame sorted by the n largest value of *columns*
 
@@ -3285,6 +3332,7 @@ def nsmallest(self, n, columns, keep="first"):
         """
         return self._n_largest_or_smallest(False, n, columns, keep)
 
+    @annotate("DATAFRAME_TRANSPOSE", color="green", domain="cudf_python")
     def transpose(self):
         """Transpose index and columns.
 
@@ -3315,6 +3363,7 @@ def transpose(self):
 
     T = property(transpose, doc=transpose.__doc__)
 
+    @annotate("DATAFRAME_MELT", color="green", domain="cudf_python")
     def melt(self, **kwargs):
         """Unpivots a DataFrame from wide format to long format,
         optionally leaving identifier variables set.
@@ -3344,7 +3393,7 @@ def melt(self, **kwargs):
 
         return melt(self, **kwargs)
 
-    @annotate("JOIN", color="blue", domain="cudf_python")
+    @annotate("DATAFRAME_JOIN", color="blue", domain="cudf_python")
     def merge(
         self,
         right,
@@ -3526,6 +3575,7 @@ def join(
         )
         return df
 
+    @annotate("DATAFRAME_GROUPBY", color="green", domain="cudf_python")
     @copy_docstring(DataFrameGroupBy)
     def groupby(
         self,
@@ -3575,6 +3625,7 @@ def groupby(
             )
         )
 
+    @annotate("DATAFRAME_QUERY", color="green", domain="cudf_python")
     def query(self, expr, local_dict=None):
         """
         Query with a boolean expression using Numba to compile a GPU kernel.
@@ -3665,6 +3716,7 @@ def query(self, expr, local_dict=None):
             boolmask = queryutils.query_execute(self, expr, callenv)
             return self._apply_boolean_mask(boolmask)
 
+    @annotate("DATAFRAME_APPLY", color="green", domain="cudf_python")
     def apply(
         self, func, axis=1, raw=False, result_type=None, args=(), **kwargs
     ):
@@ -3813,6 +3865,7 @@ def apply(
 
         return self._apply(func, _get_row_kernel, *args, **kwargs)
 
+    @annotate("DATAFRAME_APPLY_ROWS", color="green", domain="cudf_python")
     @applyutils.doc_apply()
     def apply_rows(
         self,
@@ -3891,6 +3944,7 @@ def apply_rows(
             cache_key=cache_key,
         )
 
+    @annotate("DATAFRAME_APPLY_CHUNKS", color="green", domain="cudf_python")
     @applyutils.doc_applychunks()
     def apply_chunks(
         self,
@@ -3958,6 +4012,9 @@ def apply_chunks(
             tpb=tpb,
         )
 
+    @annotate(
+        "DATAFRAME_PARTITION_BY_HASH", color="green", domain="cudf_python"
+    )
     def partition_by_hash(self, columns, nparts, keep_index=True):
         """Partition the dataframe by the hashed value of data in *columns*.
 
@@ -4295,6 +4352,7 @@ def _sizeof_fmt(num, size_qualifier):
 
         cudf.utils.ioutils.buffer_write_lines(buf, lines)
 
+    @annotate("DATAFRAME_DESCRIBE", color="green", domain="cudf_python")
     @docutils.doc_describe()
     def describe(
         self,
@@ -4354,6 +4412,7 @@ def describe(
                 sort=False,
             )
 
+    @annotate("DATAFRAME_TO_PANDAS", color="green", domain="cudf_python")
     def to_pandas(self, nullable=False, **kwargs):
         """
         Convert to a Pandas DataFrame.
@@ -4439,6 +4498,7 @@ def to_pandas(self, nullable=False, **kwargs):
         out_df.columns = out_columns
         return out_df
 
+    @annotate("DATAFRAME_FROM_PANDAS", color="green", domain="cudf_python")
     @classmethod
     def from_pandas(cls, dataframe, nan_as_null=None):
         """
@@ -4508,6 +4568,7 @@ def from_pandas(cls, dataframe, nan_as_null=None):
 
         return result
 
+    @annotate("DATAFRAME_FROM_ARROW", color="green", domain="cudf_python")
     @classmethod
     def from_arrow(cls, table):
         """
@@ -4564,6 +4625,7 @@ def from_arrow(cls, table):
 
         return out
 
+    @annotate("DATAFRAME_TO_ARROW", color="green", domain="cudf_python")
     def to_arrow(self, preserve_index=True):
         """
         Convert to a PyArrow Table.
@@ -4638,6 +4700,7 @@ def to_arrow(self, preserve_index=True):
 
         return out.replace_schema_metadata(metadata)
 
+    @annotate("DATAFRAME_TO_RECORDS", color="green", domain="cudf_python")
     def to_records(self, index=True):
         """Convert to a numpy recarray
 
@@ -4660,6 +4723,7 @@ def to_records(self, index=True):
             ret[col] = self[col].to_numpy()
         return ret
 
+    @annotate("DATAFRAME_FROM_RECORDS", color="green", domain="cudf_python")
     @classmethod
     def from_records(cls, data, index=None, columns=None, nan_as_null=False):
         """
@@ -4722,6 +4786,9 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False):
         return df
 
     @classmethod
+    @annotate(
+        "DATAFRAME_FROM_ARRAYS_INTERNAL", color="green", domain="cudf_python"
+    )
     def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False):
         """Convert a numpy/cupy array to DataFrame.
 
@@ -4781,6 +4848,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False):
             df._index = as_index(index)
         return df
 
+    @annotate("DATAFRAME_INTERPOLATE", color="green", domain="cudf_python")
     def interpolate(
         self,
         method="linear",
@@ -4811,6 +4879,7 @@ def interpolate(
             **kwargs,
         )
 
+    @annotate("DATAFRAME_QUANTILE", color="green", domain="cudf_python")
     def quantile(
         self,
         q=0.5,
@@ -4926,6 +4995,7 @@ def quantile(
             result.index = q
             return result
 
+    @annotate("DATAFRAME_QUANTILES", color="green", domain="cudf_python")
     def quantiles(self, q=0.5, interpolation="nearest"):
         """
         Return values at the given quantile.
@@ -4965,6 +5035,7 @@ def quantiles(self, q=0.5, interpolation="nearest"):
             result.index = as_index(q)
             return result
 
+    @annotate("DATAFRAME_ISIN", color="green", domain="cudf_python")
     def isin(self, values):
         """
         Whether each element in the DataFrame is contained in values.
@@ -5102,6 +5173,9 @@ def isin(self, values):
     #
     # Stats
     #
+    @annotate(
+        "DATAFRAME_PREPARE_FOR_ROWWISE_OP", color="green", domain="cudf_python"
+    )
     def _prepare_for_rowwise_op(self, method, skipna):
         """Prepare a DataFrame for CuPy-based row-wise operations."""
 
@@ -5151,6 +5225,7 @@ def _prepare_for_rowwise_op(self, method, skipna):
             coerced = coerced.astype("int64", copy=False)
         return coerced, mask, common_dtype
 
+    @annotate("DATAFRAME_COUNT", color="green", domain="cudf_python")
     def count(self, axis=0, level=None, numeric_only=False, **kwargs):
         """
         Count ``non-NA`` cells for each column or row.
@@ -5197,6 +5272,7 @@ def count(self, axis=0, level=None, numeric_only=False, **kwargs):
         "columns": 1,
     }
 
+    @annotate("DATAFRAME_REDUCE", color="green", domain="cudf_python")
     def _reduce(
         self, op, axis=None, level=None, numeric_only=None, **kwargs,
     ):
@@ -5221,6 +5297,7 @@ def _reduce(
         elif axis == 1:
             return self._apply_cupy_method_axis_1(op, **kwargs)
 
+    @annotate("DATAFRAME_SCAN", color="green", domain="cudf_python")
     def _scan(
         self, op, axis=None, *args, **kwargs,
     ):
@@ -5231,6 +5308,7 @@ def _scan(
         elif axis == 1:
             return self._apply_cupy_method_axis_1(f"cum{op}", **kwargs)
 
+    @annotate("DATAFRAME_MODE", color="green", domain="cudf_python")
     def mode(self, axis=0, numeric_only=False, dropna=True):
         """
         Get the mode(s) of each element along the selected axis.
@@ -5330,6 +5408,7 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
 
         return df
 
+    @annotate("DATAFRAME_KURTOSIS", color="green", domain="cudf_python")
     def kurtosis(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
     ):
@@ -5338,6 +5417,7 @@ def kurtosis(
             axis, skipna, level, numeric_only, **kwargs
         )
 
+    @annotate("DATAFRAME_SKEW", color="green", domain="cudf_python")
     def skew(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
     ):
@@ -5346,14 +5426,17 @@ def skew(
             axis, skipna, level, numeric_only, **kwargs
         )
 
+    @annotate("DATAFRAME_ALL", color="green", domain="cudf_python")
     def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
         obj = self.select_dtypes(include="bool") if bool_only else self
         return super(DataFrame, obj).all(axis, skipna, level, **kwargs)
 
+    @annotate("DATAFRAME_ANY", color="green", domain="cudf_python")
     def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
         obj = self.select_dtypes(include="bool") if bool_only else self
         return super(DataFrame, obj).any(axis, skipna, level, **kwargs)
 
+    @annotate("DATAFRAME_APPLY_CUPY", color="green", domain="cudf_python")
     def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
         # This method uses cupy to perform scans and reductions along rows of a
         # DataFrame. Since cuDF is designed around columnar storage and
@@ -5454,6 +5537,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
             result_df.columns = prepared.columns
             return result_df
 
+    @annotate("DATAFRAME_COLUMNS_VIEW", color="green", domain="cudf_python")
     def _columns_view(self, columns):
         """
         Return a subset of the DataFrame's columns as a view.
@@ -5462,6 +5546,7 @@ def _columns_view(self, columns):
             {col: self._data[col] for col in columns}, index=self.index
         )
 
+    @annotate("DATAFRAME_SELECT_DTYPES", color="green", domain="cudf_python")
     def select_dtypes(self, include=None, exclude=None):
         """Return a subset of the DataFrame’s columns based on the column dtypes.
 
@@ -5650,6 +5735,7 @@ def to_orc(self, fname, compression=None, *args, **kwargs):
 
         orc.to_orc(self, fname, compression, *args, **kwargs)
 
+    @annotate("DATAFRAME_STACK", color="green", domain="cudf_python")
     def stack(self, level=-1, dropna=True):
         """Stack the prescribed level(s) from columns to index
 
@@ -5711,6 +5797,7 @@ def stack(self, level=-1, dropna=True):
         else:
             return result
 
+    @annotate("DATAFRAME_COV", color="green", domain="cudf_python")
     def cov(self, **kwargs):
         """Compute the covariance matrix of a DataFrame.
 
@@ -5728,6 +5815,7 @@ def cov(self, **kwargs):
         df.columns = self.columns
         return df
 
+    @annotate("DATAFRAME_CORR", color="green", domain="cudf_python")
     def corr(self):
         """Compute the correlation matrix of a DataFrame."""
         corr = cupy.corrcoef(self.values, rowvar=False)
@@ -5735,6 +5823,7 @@ def corr(self):
         df.columns = self.columns
         return df
 
+    @annotate("DATAFRAME_TO_STRUCT", color="green", domain="cudf_python")
     def to_struct(self, name=None):
         """
         Return a struct Series composed of the columns of the DataFrame.
@@ -5759,6 +5848,7 @@ def to_struct(self, name=None):
             name=name,
         )
 
+    @annotate("DATAFRAME_KEYS", color="green", domain="cudf_python")
     def keys(self):
         """
         Get the columns.
@@ -5806,6 +5896,7 @@ def iterrows(self):
             "if you wish to iterate over each row."
         )
 
+    @annotate("DATAFRAME_APPEND", color="green", domain="cudf_python")
     def append(
         self, other, ignore_index=False, verify_integrity=False, sort=False
     ):
@@ -5958,6 +6049,7 @@ def append(
 
         return cudf.concat(to_concat, ignore_index=ignore_index, sort=sort)
 
+    @annotate("DATAFRAME_PIVOT", color="green", domain="cudf_python")
     @copy_docstring(reshape.pivot)
     def pivot(self, index, columns, values=None):
 
@@ -5965,12 +6057,14 @@ def pivot(self, index, columns, values=None):
             self, index=index, columns=columns, values=values
         )
 
+    @annotate("DATAFRAME_UNSTACK", color="green", domain="cudf_python")
     @copy_docstring(reshape.unstack)
     def unstack(self, level=-1, fill_value=None):
         return cudf.core.reshape.unstack(
             self, level=level, fill_value=fill_value
         )
 
+    @annotate("DATAFRAME_EXPLODE", color="green", domain="cudf_python")
     def explode(self, column, ignore_index=False):
         """
         Transform each element of a list-like to a row, replicating index
@@ -6145,6 +6239,7 @@ def func(left, right, output):
     )
 
 
+@annotate("CUDF_FROM_PANDAS", color="green", domain="cudf_python")
 def from_pandas(obj, nan_as_null=None):
     """
     Convert certain Pandas objects into the cudf equivalent.
@@ -6265,6 +6360,7 @@ def from_pandas(obj, nan_as_null=None):
         )
 
 
+@annotate("CUDF_MERGE", color="green", domain="cudf_python")
 def merge(left, right, *args, **kwargs):
     return left.merge(right, *args, **kwargs)
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 891f58657b0..6b0f978b6b1 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -121,6 +121,7 @@ def deserialize(cls, header, frames):
         columns = deserialize_columns(header["columns"], frames)
         return cls_deserialize._from_data(dict(zip(column_names, columns)))
 
+    @annotate("FRAME_FROM_DATA", color="green", domain="cudf_python")
     @classmethod
     def _from_data(
         cls,
@@ -131,6 +132,7 @@ def _from_data(
         Frame.__init__(obj, data, index)
         return obj
 
+    @annotate("FRAME_FROM_COLUMNS", color="green", domain="cudf_python")
     @classmethod
     def _from_columns(
         cls,
@@ -162,6 +164,9 @@ def _from_columns(
 
         return cls._from_data(data, index)
 
+    @annotate(
+        "FRAME_FROM_COLUMNS_LIKE_SELF", color="green", domain="cudf_python"
+    )
     def _from_columns_like_self(
         self,
         columns: List[ColumnBase],
@@ -339,6 +344,7 @@ def empty(self):
     def __len__(self):
         return self._num_rows
 
+    @annotate("FRAME_COPY", color="green", domain="cudf_python")
     def copy(self: T, deep: bool = True) -> T:
         """
         Make a copy of this object's indices and data.
@@ -424,6 +430,7 @@ def copy(self: T, deep: bool = True) -> T:
 
         return new_frame
 
+    @annotate("FRAME_EQUALS", color="green", domain="cudf_python")
     def equals(self, other, **kwargs):
         """
         Test whether two objects contain the same elements.
@@ -506,6 +513,7 @@ def equals(self, other, **kwargs):
         else:
             return self._index.equals(other._index)
 
+    @annotate("FRAME_EXPLODE", color="green", domain="cudf_python")
     def _explode(self, explode_column: Any, ignore_index: bool):
         """Helper function for `explode` in `Series` and `Dataframe`, explodes
         a specified nested column. Other columns' corresponding rows are
@@ -529,6 +537,9 @@ def _explode(self, explode_column: Any, ignore_index: bool):
             res.index.names = self._index.names
         return res
 
+    @annotate(
+        "FRAME_GET_COLUMNS_BY_LABEL", color="green", domain="cudf_python"
+    )
     def _get_columns_by_label(self, labels, downcast=False):
         """
         Returns columns of the Frame specified by `labels`
@@ -536,6 +547,9 @@ def _get_columns_by_label(self, labels, downcast=False):
         """
         return self._data.select_by_label(labels)
 
+    @annotate(
+        "FRAME_GET_COLUMNS_BY_INDEX", color="green", domain="cudf_python"
+    )
     def _get_columns_by_index(self, indices):
         """
         Returns columns of the Frame specified by `labels`
@@ -559,6 +573,7 @@ def _as_column(self):
 
         return self._data[None].copy(deep=False)
 
+    @annotate("FRAME_EMPTY_LIKE", color="green", domain="cudf_python")
     def _empty_like(self, keep_index=True):
         result = self.__class__._from_data(
             *libcudf.copying.table_empty_like(self, keep_index)
@@ -648,6 +663,7 @@ def get_column_values_na(col):
             matrix[:, i] = get_column_values_na(col)
         return matrix
 
+    @annotate("FRAME_TO_CUPY", color="green", domain="cudf_python")
     def to_cupy(
         self,
         dtype: Union[Dtype, None] = None,
@@ -682,6 +698,7 @@ def to_cupy(
             na_value,
         )
 
+    @annotate("FRAME_TO_NUMPY", color="green", domain="cudf_python")
     def to_numpy(
         self,
         dtype: Union[Dtype, None] = None,
@@ -716,6 +733,7 @@ def to_numpy(
             (lambda col: col.values_host), np.empty, dtype, na_value
         )
 
+    @annotate("FRAME_CLIP", color="green", domain="cudf_python")
     def clip(self, lower=None, upper=None, inplace=False, axis=1):
         """
         Trim values at input threshold(s).
@@ -843,6 +861,7 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1):
 
         return self._mimic_inplace(output, inplace=inplace)
 
+    @annotate("FRAME_WHERE", color="green", domain="cudf_python")
     def where(self, cond, other=None, inplace=False):
         """
         Replace values where the condition is False.
@@ -901,6 +920,7 @@ def where(self, cond, other=None, inplace=False):
             frame=self, cond=cond, other=other, inplace=inplace
         )
 
+    @annotate("FRAME_MASK", color="green", domain="cudf_python")
     def mask(self, cond, other=None, inplace=False):
         """
         Replace values where the condition is True.
@@ -962,6 +982,7 @@ def mask(self, cond, other=None, inplace=False):
 
         return self.where(cond=~cond, other=other, inplace=inplace)
 
+    @annotate("FRAME_PIPE", color="green", domain="cudf_python")
     def pipe(self, func, *args, **kwargs):
         """
         Apply ``func(self, *args, **kwargs)``.
@@ -1092,6 +1113,7 @@ def scatter_by_map(
 
         return result
 
+    @annotate("FRAME_FILLNA", color="green", domain="cudf_python")
     def fillna(
         self, value=None, method=None, axis=None, inplace=False, limit=None
     ):
@@ -1238,6 +1260,7 @@ def fillna(
 
         return self._mimic_inplace(result, inplace=inplace)
 
+    @annotate("FRAME_DROPNA_COLUMNS", color="green", domain="cudf_python")
     def _drop_na_columns(self, how="any", subset=None, thresh=None):
         """
         Drop columns containing nulls
@@ -1265,6 +1288,7 @@ def _drop_na_columns(self, how="any", subset=None, thresh=None):
 
         return self[out_cols]
 
+    @annotate("FRAME_INTERPOLATE", color="green", domain="cudf_python")
     def interpolate(
         self,
         method="linear",
@@ -1334,6 +1358,7 @@ def interpolate(
             else result._gather(perm_sort.argsort())
         )
 
+    @annotate("FRAME_QUANTILES", color="green", domain="cudf_python")
     def _quantiles(
         self,
         q,
@@ -1366,6 +1391,7 @@ def _quantiles(
         result._copy_type_metadata(self)
         return result
 
+    @annotate("FRAME_RANK", color="green", domain="cudf_python")
     def rank(
         self,
         axis=0,
@@ -1442,6 +1468,7 @@ def rank(
 
         return self._from_data(data, index).astype(np.float64)
 
+    @annotate("FRAME_REPEAT", color="green", domain="cudf_python")
     def repeat(self, repeats, axis=None):
         """Repeats elements consecutively.
 
@@ -1531,6 +1558,7 @@ def repeat(self, repeats, axis=None):
         result._copy_type_metadata(self)
         return result
 
+    @annotate("FRAME_SHIFT", color="green", domain="cudf_python")
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """Shift values by `periods` positions."""
         axis = self._get_axis_from_axis_arg(axis)
@@ -1546,7 +1574,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
             zip(self._column_names, data_columns), self._index
         )
 
-    @annotate("SAMPLE", color="orange", domain="cudf_python")
+    @annotate("FRAME_SAMPLE", color="orange", domain="cudf_python")
     def sample(
         self,
         n=None,
@@ -1740,7 +1768,7 @@ def sample(
             return result
 
     @classmethod
-    @annotate("FROM_ARROW", color="orange", domain="cudf_python")
+    @annotate("FRAME_FROM_ARROW", color="orange", domain="cudf_python")
     def from_arrow(cls, data):
         """Convert from PyArrow Table to Frame
 
@@ -1880,7 +1908,7 @@ def from_arrow(cls, data):
 
         return cls._from_data({name: result[name] for name in column_names})
 
-    @annotate("TO_ARROW", color="orange", domain="cudf_python")
+    @annotate("FRAME_TO_ARROW", color="orange", domain="cudf_python")
     def to_arrow(self):
         """
         Convert to arrow Table
@@ -1912,6 +1940,7 @@ def _positions_from_column_names(self, column_names):
             if name in set(column_names)
         ]
 
+    @annotate("FRAME_REPLACE", color="green", domain="cudf_python")
     def replace(
         self,
         to_replace=None,
@@ -2198,6 +2227,7 @@ def _copy_type_metadata(
 
         return self
 
+    @annotate("FRAME_ISNULL", color="green", domain="cudf_python")
     def isnull(self):
         """
         Identify missing values.
@@ -2279,6 +2309,7 @@ def isnull(self):
     # Alias for isnull
     isna = isnull
 
+    @annotate("FRAME_NOTNULL", color="green", domain="cudf_python")
     def notnull(self):
         """
         Identify non-missing values.
@@ -2360,6 +2391,7 @@ def notnull(self):
     # Alias for notnull
     notna = notnull
 
+    @annotate("FRAME_INTERLEAVE_COLUMNS", color="green", domain="cudf_python")
     def interleave_columns(self):
         """
         Interleave Series columns of a table into a single column.
@@ -2399,6 +2431,7 @@ def interleave_columns(self):
 
         return result
 
+    @annotate("FRAME_TILE", color="green", domain="cudf_python")
     def tile(self, count):
         """
         Repeats the rows from `self` DataFrame `count` times to form a
@@ -2428,6 +2461,7 @@ def tile(self, count):
         result._copy_type_metadata(self)
         return result
 
+    @annotate("FRAME_SEARCHSORTED", color="green", domain="cudf_python")
     def searchsorted(
         self, values, side="left", ascending=True, na_position="last"
     ):
@@ -2512,7 +2546,7 @@ def searchsorted(
         else:
             return result
 
-    @annotate("ARGSORT", color="yellow", domain="cudf_python")
+    @annotate("FRAME_ARGSORT", color="yellow", domain="cudf_python")
     def argsort(
         self,
         by=None,
@@ -2615,6 +2649,7 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
 
         return libcudf.sort.order_by(to_sort, ascending, na_position)
 
+    @annotate("FRAME_SIN", color="green", domain="cudf_python")
     def sin(self):
         """
         Get Trigonometric sine, element-wise.
@@ -2676,6 +2711,7 @@ def sin(self):
         """
         return self._unaryop("sin")
 
+    @annotate("FRAME_COS", color="green", domain="cudf_python")
     def cos(self):
         """
         Get Trigonometric cosine, element-wise.
@@ -2737,6 +2773,7 @@ def cos(self):
         """
         return self._unaryop("cos")
 
+    @annotate("FRAME_TAN", color="green", domain="cudf_python")
     def tan(self):
         """
         Get Trigonometric tangent, element-wise.
@@ -2798,6 +2835,7 @@ def tan(self):
         """
         return self._unaryop("tan")
 
+    @annotate("FRAME_ASIN", color="green", domain="cudf_python")
     def asin(self):
         """
         Get Trigonometric inverse sine, element-wise.
@@ -2848,6 +2886,7 @@ def asin(self):
         """
         return self._unaryop("asin")
 
+    @annotate("FRAME_ACOS", color="green", domain="cudf_python")
     def acos(self):
         """
         Get Trigonometric inverse cosine, element-wise.
@@ -2906,6 +2945,7 @@ def acos(self):
         result = result.mask((result < 0) | (result > np.pi + 1))
         return result
 
+    @annotate("FRAME_ATAN", color="green", domain="cudf_python")
     def atan(self):
         """
         Get Trigonometric inverse tangent, element-wise.
@@ -2966,6 +3006,7 @@ def atan(self):
         """
         return self._unaryop("atan")
 
+    @annotate("FRAME_EXP", color="green", domain="cudf_python")
     def exp(self):
         """
         Get the exponential of all elements, element-wise.
@@ -3028,6 +3069,7 @@ def exp(self):
         """
         return self._unaryop("exp")
 
+    @annotate("FRAME_LOG", color="green", domain="cudf_python")
     def log(self):
         """
         Get the natural logarithm of all elements, element-wise.
@@ -3089,6 +3131,7 @@ def log(self):
         """
         return self._unaryop("log")
 
+    @annotate("FRAME_SQRT", color="green", domain="cudf_python")
     def sqrt(self):
         """
         Get the non-negative square-root of all elements, element-wise.
@@ -3144,6 +3187,7 @@ def sqrt(self):
         """
         return self._unaryop("sqrt")
 
+    @annotate("FRAME_ABS", color="green", domain="cudf_python")
     def abs(self):
         """
         Return a Series/DataFrame with absolute numeric value of each element.
@@ -3170,6 +3214,7 @@ def abs(self):
         return self._unaryop("abs")
 
     # Rounding
+    @annotate("FRAME_CEIL", color="green", domain="cudf_python")
     def ceil(self):
         """
         Rounds each value upward to the smallest integral value not less
@@ -3206,6 +3251,7 @@ def ceil(self):
 
         return self._unaryop("ceil")
 
+    @annotate("FRAME_FLOOR", color="green", domain="cudf_python")
     def floor(self):
         """Rounds each value downward to the largest integral value not greater
         than the original.
@@ -3245,6 +3291,7 @@ def floor(self):
 
         return self._unaryop("floor")
 
+    @annotate("FRAME_SCALE", color="green", domain="cudf_python")
     def scale(self):
         """
         Scale values to [0, 1] in float64
@@ -3279,6 +3326,7 @@ def scale(self):
         scaled._index = self._index.copy(deep=False)
         return scaled
 
+    @annotate("FRAME_INTERNAL_MERGE", color="green", domain="cudf_python")
     def _merge(
         self,
         right,
@@ -3322,6 +3370,7 @@ def _merge(
             suffixes=suffixes,
         ).perform_merge()
 
+    @annotate("FRAME_IS_SORTED", color="green", domain="cudf_python")
     def _is_sorted(self, ascending=None, null_position=None):
         """
         Returns a boolean indicating whether the data of the Frame are sorted
@@ -3352,12 +3401,14 @@ def _is_sorted(self, ascending=None, null_position=None):
             self, ascending=ascending, null_position=null_position
         )
 
+    @annotate("FRAME_SPLIT", color="green", domain="cudf_python")
     def _split(self, splits, keep_index=True):
         results = libcudf.copying.table_split(
             self, splits, keep_index=keep_index
         )
         return [self.__class__._from_data(*result) for result in results]
 
+    @annotate("FRAME_ENCODE", color="green", domain="cudf_python")
     def _encode(self):
         data, index, indices = libcudf.transform.table_encode(self)
         for name, col in data.items():
@@ -3365,6 +3416,7 @@ def _encode(self):
         keys = self.__class__._from_data(data, index)
         return keys, indices
 
+    @annotate("FRAME_UNARYOP", color="green", domain="cudf_python")
     def _unaryop(self, op):
         data_columns = (col.unary_operator(op) for col in self._columns)
         return self.__class__._from_data(
@@ -3404,6 +3456,7 @@ def _binaryop(
         raise NotImplementedError
 
     @classmethod
+    @annotate("FRAME_COLWISE_BINOP", color="green", domain="cudf_python")
     def _colwise_binop(
         cls,
         operands: Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
@@ -3562,6 +3615,7 @@ def _colwise_binop(
 
         return output
 
+    @annotate("FRAME_DOT", color="green", domain="cudf_python")
     def dot(self, other, reflect=False):
         """
         Get dot product of frame and other, (binary operator `dot`).
@@ -3722,6 +3776,7 @@ def _reduce(self, *args, **kwargs):
             f"Reductions are not supported for objects of type {type(self)}."
         )
 
+    @annotate("FRAME_MIN", color="green", domain="cudf_python")
     def min(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs,
     ):
@@ -3767,6 +3822,7 @@ def min(
             **kwargs,
         )
 
+    @annotate("FRAME_MAX", color="green", domain="cudf_python")
     def max(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs,
     ):
@@ -3812,6 +3868,7 @@ def max(
             **kwargs,
         )
 
+    @annotate("FRAME_SUM", color="green", domain="cudf_python")
     def sum(
         self,
         axis=None,
@@ -3870,6 +3927,7 @@ def sum(
             **kwargs,
         )
 
+    @annotate("FRAME_PRODUCT", color="green", domain="cudf_python")
     def product(
         self,
         axis=None,
@@ -3934,6 +3992,7 @@ def product(
     # Alias for pandas compatibility.
     prod = product
 
+    @annotate("FRAME_MEAN", color="green", domain="cudf_python")
     def mean(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
     ):
@@ -3978,6 +4037,7 @@ def mean(
             **kwargs,
         )
 
+    @annotate("FRAME_STD", color="green", domain="cudf_python")
     def std(
         self,
         axis=None,
@@ -4034,6 +4094,7 @@ def std(
             **kwargs,
         )
 
+    @annotate("FRAME_VAR", color="green", domain="cudf_python")
     def var(
         self,
         axis=None,
@@ -4089,6 +4150,7 @@ def var(
             **kwargs,
         )
 
+    @annotate("FRAME_KURTOSIS", color="green", domain="cudf_python")
     def kurtosis(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
     ):
@@ -4157,6 +4219,7 @@ def kurt(
             **kwargs,
         )
 
+    @annotate("FRAME_SKEW", color="green", domain="cudf_python")
     def skew(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
     ):
@@ -4214,6 +4277,7 @@ def skew(
             **kwargs,
         )
 
+    @annotate("FRAME_ALL", color="green", domain="cudf_python")
     def all(self, axis=0, skipna=True, level=None, **kwargs):
         """
         Return whether all elements are True in DataFrame.
@@ -4249,6 +4313,7 @@ def all(self, axis=0, skipna=True, level=None, **kwargs):
             "all", axis=axis, skipna=skipna, level=level, **kwargs,
         )
 
+    @annotate("FRAME_ANY", color="green", domain="cudf_python")
     def any(self, axis=0, skipna=True, level=None, **kwargs):
         """
         Return whether any elements is True in DataFrame.
@@ -4284,6 +4349,7 @@ def any(self, axis=0, skipna=True, level=None, **kwargs):
             "any", axis=axis, skipna=skipna, level=level, **kwargs,
         )
 
+    @annotate("FRAME_SUM_OF_SQUARES", color="green", domain="cudf_python")
     def sum_of_squares(self, dtype=None):
         """Return the sum of squares of values.
 
@@ -4307,6 +4373,7 @@ def sum_of_squares(self, dtype=None):
         """
         return self._reduce("sum_of_squares", dtype=dtype)
 
+    @annotate("FRAME_MEDIAN", color="green", domain="cudf_python")
     def median(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
     ):
@@ -4352,6 +4419,7 @@ def median(
         )
 
     # Scans
+    @annotate("FRAME_SCAN", color="green", domain="cudf_python")
     def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
         skipna = True if skipna is None else skipna
 
@@ -4386,6 +4454,7 @@ def _scan(self, op, axis=None, skipna=True, cast_to_int=False):
         # for Index._from_data and simplify.
         return self._from_data(results, index=self._index)
 
+    @annotate("FRAME_CUMMIN", color="green", domain="cudf_python")
     def cummin(self, axis=None, skipna=True, *args, **kwargs):
         """
         Return cumulative minimum of the Series or DataFrame.
@@ -4429,6 +4498,7 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs):
         """
         return self._scan("min", axis=axis, skipna=skipna, *args, **kwargs)
 
+    @annotate("FRAME_CUMMAX", color="green", domain="cudf_python")
     def cummax(self, axis=None, skipna=True, *args, **kwargs):
         """
         Return cumulative maximum of the Series or DataFrame.
@@ -4472,6 +4542,7 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs):
         """
         return self._scan("max", axis=axis, skipna=skipna, *args, **kwargs)
 
+    @annotate("FRAME_CUMSUM", color="green", domain="cudf_python")
     def cumsum(self, axis=None, skipna=True, *args, **kwargs):
         """
         Return cumulative sum of the Series or DataFrame.
@@ -4518,6 +4589,7 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs):
             "sum", axis=axis, skipna=skipna, cast_to_int=True, *args, **kwargs
         )
 
+    @annotate("FRAME_CUMPROD", color="green", domain="cudf_python")
     def cumprod(self, axis=None, skipna=True, *args, **kwargs):
         """
         Return cumulative product of the Series or DataFrame.
@@ -4563,6 +4635,7 @@ def cumprod(self, axis=None, skipna=True, *args, **kwargs):
             "prod", axis=axis, skipna=skipna, cast_to_int=True, *args, **kwargs
         )
 
+    @annotate("FRAME_TO_JSON", color="green", domain="cudf_python")
     @ioutils.doc_to_json()
     def to_json(self, path_or_buf=None, *args, **kwargs):
         """{docstring}"""
@@ -4571,18 +4644,21 @@ def to_json(self, path_or_buf=None, *args, **kwargs):
             self, path_or_buf=path_or_buf, *args, **kwargs
         )
 
+    @annotate("FRAME_TO_HDF", color="green", domain="cudf_python")
     @ioutils.doc_to_hdf()
     def to_hdf(self, path_or_buf, key, *args, **kwargs):
         """{docstring}"""
 
         cudf.io.hdf.to_hdf(path_or_buf, key, self, *args, **kwargs)
 
+    @annotate("FRAME_TO_DLPACK", color="green", domain="cudf_python")
     @ioutils.doc_to_dlpack()
     def to_dlpack(self):
         """{docstring}"""
 
         return cudf.io.dlpack.to_dlpack(self)
 
+    @annotate("FRAME_TO_STRING", color="green", domain="cudf_python")
     def to_string(self):
         """
         Convert to string
@@ -4608,12 +4684,15 @@ def to_string(self):
     def __str__(self):
         return self.to_string()
 
+    @annotate("FRAME_DEEP_COPY", color="green", domain="cudf_python")
     def __deepcopy__(self, memo):
         return self.copy(deep=True)
 
+    @annotate("FRAME_COPY", color="green", domain="cudf_python")
     def __copy__(self):
         return self.copy(deep=False)
 
+    @annotate("FRAME_HEAD", color="green", domain="cudf_python")
     def head(self, n=5):
         """
         Return the first `n` rows.
@@ -4697,6 +4776,7 @@ def head(self, n=5):
         """
         return self.iloc[:n]
 
+    @annotate("FRAME_TAIL", color="green", domain="cudf_python")
     def tail(self, n=5):
         """
         Returns the last n rows as a new DataFrame or Series
@@ -4728,6 +4808,7 @@ def tail(self, n=5):
 
         return self.iloc[-n:]
 
+    @annotate("FRAME_ROLLING", color="green", domain="cudf_python")
     @copy_docstring(Rolling)
     def rolling(
         self, window, min_periods=None, center=False, axis=0, win_type=None
@@ -4741,6 +4822,7 @@ def rolling(
             win_type=win_type,
         )
 
+    @annotate("FRAME_NANS_TO_NULLS", color="green", domain="cudf_python")
     def nans_to_nulls(self):
         """
         Convert nans (if any) to nulls
@@ -4795,6 +4877,7 @@ def nans_to_nulls(self):
             self._index,
         )
 
+    @annotate("FRAME_INVERT", color="green", domain="cudf_python")
     def __invert__(self):
         """Bitwise invert (~) for integral dtypes, logical NOT for bools."""
         return self._from_data(
@@ -4805,6 +4888,7 @@ def __invert__(self):
             self._index,
         )
 
+    @annotate("FRAME_ADD", color="green", domain="cudf_python")
     def add(self, other, axis, level=None, fill_value=None):
         """
         Get Addition of dataframe or series and other, element-wise (binary
@@ -4875,6 +4959,7 @@ def add(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "add", fill_value)
 
+    @annotate("FRAME_RADD", color="green", domain="cudf_python")
     def radd(self, other, axis, level=None, fill_value=None):
         """
         Get Addition of dataframe or series and other, element-wise (binary
@@ -4954,6 +5039,7 @@ def radd(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "add", fill_value, reflect=True)
 
+    @annotate("FRAME_SUBTRACT", color="green", domain="cudf_python")
     def subtract(self, other, axis, level=None, fill_value=None):
         """
         Get Subtraction of dataframe or series and other, element-wise (binary
@@ -5036,6 +5122,7 @@ def subtract(self, other, axis, level=None, fill_value=None):
 
     sub = subtract
 
+    @annotate("FRAME_RSUB", color="green", domain="cudf_python")
     def rsub(self, other, axis, level=None, fill_value=None):
         """
         Get Subtraction of dataframe or series and other, element-wise (binary
@@ -5119,6 +5206,7 @@ def rsub(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "sub", fill_value, reflect=True)
 
+    @annotate("FRAME_MULTIPLY", color="green", domain="cudf_python")
     def multiply(self, other, axis, level=None, fill_value=None):
         """
         Get Multiplication of dataframe or series and other, element-wise
@@ -5203,6 +5291,7 @@ def multiply(self, other, axis, level=None, fill_value=None):
 
     mul = multiply
 
+    @annotate("FRAME_RMUL", color="green", domain="cudf_python")
     def rmul(self, other, axis, level=None, fill_value=None):
         """
         Get Multiplication of dataframe or series and other, element-wise
@@ -5287,6 +5376,7 @@ def rmul(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "mul", fill_value, reflect=True)
 
+    @annotate("FRAME_MOD", color="green", domain="cudf_python")
     def mod(self, other, axis, level=None, fill_value=None):
         """
         Get Modulo division of dataframe or series and other, element-wise
@@ -5357,6 +5447,7 @@ def mod(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "mod", fill_value)
 
+    @annotate("FRAME_RMOD", color="green", domain="cudf_python")
     def rmod(self, other, axis, level=None, fill_value=None):
         """
         Get Modulo division of dataframe or series and other, element-wise
@@ -5439,6 +5530,7 @@ def rmod(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "mod", fill_value, reflect=True)
 
+    @annotate("FRAME_POW", color="green", domain="cudf_python")
     def pow(self, other, axis, level=None, fill_value=None):
         """
         Get Exponential power of dataframe series and other, element-wise
@@ -5518,6 +5610,7 @@ def pow(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "pow", fill_value)
 
+    @annotate("FRAME_RPOW", color="green", domain="cudf_python")
     def rpow(self, other, axis, level=None, fill_value=None):
         """
         Get Exponential power of dataframe or series and other, element-wise
@@ -5597,6 +5690,7 @@ def rpow(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "pow", fill_value, reflect=True)
 
+    @annotate("FRAME_FLOORDIV", color="green", domain="cudf_python")
     def floordiv(self, other, axis, level=None, fill_value=None):
         """
         Get Integer division of dataframe or series and other, element-wise
@@ -5676,6 +5770,7 @@ def floordiv(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "floordiv", fill_value)
 
+    @annotate("FRAME_RFLOORDIV", color="green", domain="cudf_python")
     def rfloordiv(self, other, axis, level=None, fill_value=None):
         """
         Get Integer division of dataframe or series and other, element-wise
@@ -5772,6 +5867,7 @@ def rfloordiv(self, other, axis, level=None, fill_value=None):
 
         return self._binaryop(other, "floordiv", fill_value, reflect=True)
 
+    @annotate("FRAME_TRUEDIV", color="green", domain="cudf_python")
     def truediv(self, other, axis, level=None, fill_value=None):
         """
         Get Floating division of dataframe or series and other, element-wise
@@ -5860,6 +5956,7 @@ def truediv(self, other, axis, level=None, fill_value=None):
     div = truediv
     divide = truediv
 
+    @annotate("FRAME_RTRUEDIV", color="green", domain="cudf_python")
     def rtruediv(self, other, axis, level=None, fill_value=None):
         """
         Get Floating division of dataframe or series and other, element-wise
@@ -5952,6 +6049,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None):
     # Alias for rtruediv
     rdiv = rtruediv
 
+    @annotate("FRAME_EQ", color="green", domain="cudf_python")
     def eq(self, other, axis="columns", level=None, fill_value=None):
         """Equal to, element-wise (binary operator eq).
 
@@ -6027,6 +6125,7 @@ def eq(self, other, axis="columns", level=None, fill_value=None):
             other=other, fn="eq", fill_value=fill_value, can_reindex=True
         )
 
+    @annotate("FRAME_NE", color="green", domain="cudf_python")
     def ne(self, other, axis="columns", level=None, fill_value=None):
         """Not equal to, element-wise (binary operator ne).
 
@@ -6102,6 +6201,7 @@ def ne(self, other, axis="columns", level=None, fill_value=None):
             other=other, fn="ne", fill_value=fill_value, can_reindex=True
         )
 
+    @annotate("FRAME_LT", color="green", domain="cudf_python")
     def lt(self, other, axis="columns", level=None, fill_value=None):
         """Less than, element-wise (binary operator lt).
 
@@ -6177,6 +6277,7 @@ def lt(self, other, axis="columns", level=None, fill_value=None):
             other=other, fn="lt", fill_value=fill_value, can_reindex=True
         )
 
+    @annotate("FRAME_LE", color="green", domain="cudf_python")
     def le(self, other, axis="columns", level=None, fill_value=None):
         """Less than or equal, element-wise (binary operator le).
 
@@ -6252,6 +6353,7 @@ def le(self, other, axis="columns", level=None, fill_value=None):
             other=other, fn="le", fill_value=fill_value, can_reindex=True
         )
 
+    @annotate("FRAME_GT", color="green", domain="cudf_python")
     def gt(self, other, axis="columns", level=None, fill_value=None):
         """Greater than, element-wise (binary operator gt).
 
@@ -6327,6 +6429,7 @@ def gt(self, other, axis="columns", level=None, fill_value=None):
             other=other, fn="gt", fill_value=fill_value, can_reindex=True
         )
 
+    @annotate("FRAME_GE", color="green", domain="cudf_python")
     def ge(self, other, axis="columns", level=None, fill_value=None):
         """Greater than or equal, element-wise (binary operator ge).
 
@@ -6403,6 +6506,11 @@ def ge(self, other, axis="columns", level=None, fill_value=None):
         )
 
 
+@annotate(
+    "FRAME_GET_REPLACEMENT_VALUES_FOR_COLUMNS",
+    color="green",
+    domain="cudf_python",
+)
 def _get_replacement_values_for_columns(
     to_replace: Any, value: Any, columns_dtype_map: Dict[Any, Any]
 ) -> Tuple[Dict[Any, bool], Dict[Any, Any], Dict[Any, Any]]:
@@ -6567,6 +6675,7 @@ def _is_series(obj):
     return isinstance(obj, Frame) and obj.ndim == 1 and obj._index is not None
 
 
+@annotate("FRAME_DROP_ROWS_BY_LABELS", color="green", domain="cudf_python")
 def _drop_rows_by_labels(
     obj: DataFrameOrSeries,
     labels: Union[ColumnLike, abc.Iterable, str],
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index a919b00692d..948428de4f0 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -7,6 +7,7 @@
 from uuid import uuid4
 
 import numpy as np
+from nvtx import annotate
 from pyarrow import dataset as ds, parquet as pq
 
 import cudf
@@ -16,6 +17,7 @@
 from cudf.utils import ioutils
 
 
+@annotate("_WRITE_PARQUET", color="green", domain="cudf_python")
 def _write_parquet(
     df,
     paths,
@@ -73,6 +75,7 @@ def _write_parquet(
 
 # Logic chosen to match: https://arrow.apache.org/
 # docs/_modules/pyarrow/parquet.html#write_to_dataset
+@annotate("WRITE_TO_DATASET", color="green", domain="cudf_python")
 def write_to_dataset(
     df,
     root_path,
@@ -161,6 +164,7 @@ def write_to_dataset(
 
 
 @ioutils.doc_read_parquet_metadata()
+@annotate("READ_PARQUET_METADATA", color="green", domain="cudf_python")
 def read_parquet_metadata(path):
     """{docstring}"""
 
@@ -173,6 +177,7 @@ def read_parquet_metadata(path):
     return num_rows, num_row_groups, col_names
 
 
+@annotate("_PROCESS_DATASET", color="green", domain="cudf_python")
 def _process_dataset(
     paths, fs, filters=None, row_groups=None, categorical_partitions=True,
 ):
@@ -308,6 +313,7 @@ def _process_dataset(
 
 
 @ioutils.doc_read_parquet()
+@annotate("READ_PARQUET", color="green", domain="cudf_python")
 def read_parquet(
     filepath_or_buffer,
     engine="cudf",
@@ -435,6 +441,7 @@ def read_parquet(
     )
 
 
+@annotate("_PARQUET_TO_FRAME", color="green", domain="cudf_python")
 def _parquet_to_frame(
     paths_or_buffers,
     *args,
@@ -502,6 +509,7 @@ def _parquet_to_frame(
     )
 
 
+@annotate("_WRITE_PARQUET", color="green", domain="cudf_python")
 def _read_parquet(
     filepaths_or_buffers,
     engine,
@@ -535,6 +543,7 @@ def _read_parquet(
 
 
 @ioutils.doc_to_parquet()
+@annotate("TO_PARQUET", color="green", domain="cudf_python")
 def to_parquet(
     df,
     path,
@@ -646,6 +655,7 @@ def _generate_filename():
     return uuid4().hex + ".parquet"
 
 
+@annotate("_GET_PARTITIONED", color="green", domain="cudf_python")
 def _get_partitioned(
     df,
     root_path,
@@ -689,6 +699,7 @@ def _get_partitioned(
 
 
 class ParquetDatasetWriter:
+    @annotate("ParquetDatasetWriter_INIT", color="green", domain="cudf_python")
     def __init__(
         self,
         path,
@@ -765,6 +776,9 @@ def __init__(
         self.path_cw_map: Dict[str, int] = {}
         self.filename = None
 
+    @annotate(
+        "ParquetDatasetWriter_WRITE_TABLE", color="green", domain="cudf_python"
+    )
     def write_table(self, df):
         """
         Write a dataframe to the file/dataset
@@ -821,6 +835,9 @@ def write_table(self, df):
         self.path_cw_map.update({k: new_cw_idx for k in new_paths})
         self._chunked_writers[-1][0].write_table(grouped_df, part_info)
 
+    @annotate(
+        "ParquetDatasetWriter_CLOSE", color="green", domain="cudf_python"
+    )
     def close(self, return_metadata=False):
         """
         Close all open files and optionally return footer metadata as a binary

From 814ed75bac6d873a970ac4103c810c66a9b8bdbf Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 2 Feb 2022 18:39:48 -0600
Subject: [PATCH 2/8] Update frame.py

---
 python/cudf/cudf/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 6b0f978b6b1..2123569e482 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -121,8 +121,8 @@ def deserialize(cls, header, frames):
         columns = deserialize_columns(header["columns"], frames)
         return cls_deserialize._from_data(dict(zip(column_names, columns)))
 
-    @annotate("FRAME_FROM_DATA", color="green", domain="cudf_python")
     @classmethod
+    @annotate("FRAME_FROM_DATA", color="green", domain="cudf_python")
     def _from_data(
         cls,
         data: MutableMapping,
@@ -132,8 +132,8 @@ def _from_data(
         Frame.__init__(obj, data, index)
         return obj
 
-    @annotate("FRAME_FROM_COLUMNS", color="green", domain="cudf_python")
     @classmethod
+    @annotate("FRAME_FROM_COLUMNS", color="green", domain="cudf_python")
     def _from_columns(
         cls,
         columns: List[ColumnBase],

From 6377d68bcbdeed327806e001dfd8cc1faef600dd Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 2 Feb 2022 18:56:11 -0600
Subject: [PATCH 3/8] Update dataframe.py

---
 python/cudf/cudf/core/dataframe.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 3131e10d52c..48ef140c949 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4498,8 +4498,8 @@ def to_pandas(self, nullable=False, **kwargs):
         out_df.columns = out_columns
         return out_df
 
-    @annotate("DATAFRAME_FROM_PANDAS", color="green", domain="cudf_python")
     @classmethod
+    @annotate("DATAFRAME_FROM_PANDAS", color="green", domain="cudf_python")
     def from_pandas(cls, dataframe, nan_as_null=None):
         """
         Convert from a Pandas DataFrame.
@@ -4568,8 +4568,8 @@ def from_pandas(cls, dataframe, nan_as_null=None):
 
         return result
 
-    @annotate("DATAFRAME_FROM_ARROW", color="green", domain="cudf_python")
     @classmethod
+    @annotate("DATAFRAME_FROM_ARROW", color="green", domain="cudf_python")
     def from_arrow(cls, table):
         """
         Convert from PyArrow Table to DataFrame.
@@ -4723,8 +4723,8 @@ def to_records(self, index=True):
             ret[col] = self[col].to_numpy()
         return ret
 
-    @annotate("DATAFRAME_FROM_RECORDS", color="green", domain="cudf_python")
     @classmethod
+    @annotate("DATAFRAME_FROM_RECORDS", color="green", domain="cudf_python")
     def from_records(cls, data, index=None, columns=None, nan_as_null=False):
         """
         Convert structured or record ndarray to DataFrame.

From 24e9d4985afa0d6ff403b54b3ed2650b0bfda724 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 2 Feb 2022 17:41:12 -0800
Subject: [PATCH 4/8] remove annotation

---
 python/cudf/cudf/core/dataframe.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 48ef140c949..8cbb9b0df5f 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3625,7 +3625,6 @@ def groupby(
             )
         )
 
-    @annotate("DATAFRAME_QUERY", color="green", domain="cudf_python")
     def query(self, expr, local_dict=None):
         """
         Query with a boolean expression using Numba to compile a GPU kernel.
@@ -3692,7 +3691,7 @@ def query(self, expr, local_dict=None):
         """
         # can't use `annotate` decorator here as we inspect the calling
         # environment.
-        with annotate("QUERY", color="purple", domain="cudf_python"):
+        with annotate("DATAFRAME_QUERY", color="purple", domain="cudf_python"):
             if local_dict is None:
                 local_dict = {}
 

From 98676756024597e8449176f62fca340757970d84 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 3 Feb 2022 07:40:56 -0800
Subject: [PATCH 5/8] annotations in dask_cudf

---
 python/dask_cudf/dask_cudf/backends.py | 23 +++++++++++++
 python/dask_cudf/dask_cudf/core.py     | 41 ++++++++++++++++++++++
 python/dask_cudf/dask_cudf/groupby.py  | 47 ++++++++++++++++++++++++++
 python/dask_cudf/dask_cudf/sorting.py  |  8 +++++
 4 files changed, 119 insertions(+)

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 89b5301ee83..9ad4bfb5b82 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
+from nvtx import annotate
 
 from dask.dataframe.core import get_parallel_type, meta_nonempty
 from dask.dataframe.dispatch import (
@@ -39,6 +40,7 @@
 
 
 @meta_nonempty.register(cudf.BaseIndex)
+@annotate("_nonempty_index", color="green", domain="dask_cudf_python")
 def _nonempty_index(idx):
     if isinstance(idx, cudf.core.index.RangeIndex):
         return cudf.core.index.RangeIndex(2, name=idx.name)
@@ -73,6 +75,7 @@ def _nonempty_index(idx):
     raise TypeError(f"Don't know how to handle index of type {type(idx)}")
 
 
+@annotate("_get_non_empty_data", color="green", domain="dask_cudf_python")
 def _get_non_empty_data(s):
     if isinstance(s._column, cudf.core.column.CategoricalColumn):
         categories = (
@@ -100,6 +103,7 @@ def _get_non_empty_data(s):
 
 
 @meta_nonempty.register(cudf.Series)
+@annotate("_nonempty_series", color="green", domain="dask_cudf_python")
 def _nonempty_series(s, idx=None):
     if idx is None:
         idx = _nonempty_index(s.index)
@@ -109,6 +113,7 @@ def _nonempty_series(s, idx=None):
 
 
 @meta_nonempty.register(cudf.DataFrame)
+@annotate("meta_nonempty_cudf", color="green", domain="dask_cudf_python")
 def meta_nonempty_cudf(x):
     idx = meta_nonempty(x.index)
     columns_with_dtype = dict()
@@ -124,15 +129,18 @@ def meta_nonempty_cudf(x):
 
 
 @make_meta_dispatch.register((cudf.Series, cudf.DataFrame))
+@annotate("make_meta_cudf", color="green", domain="dask_cudf_python")
 def make_meta_cudf(x, index=None):
     return x.head(0)
 
 
 @make_meta_dispatch.register(cudf.BaseIndex)
+@annotate("make_meta_cudf_index", color="green", domain="dask_cudf_python")
 def make_meta_cudf_index(x, index=None):
     return x[:0]
 
 
+@annotate("_empty_series", color="green", domain="dask_cudf_python")
 def _empty_series(name, dtype, index=None):
     if isinstance(dtype, str) and dtype == "category":
         return cudf.Series(
@@ -142,6 +150,7 @@ def _empty_series(name, dtype, index=None):
 
 
 @make_meta_obj.register(object)
+@annotate("make_meta_object_cudf", color="green", domain="dask_cudf_python")
 def make_meta_object_cudf(x, index=None):
     """Create an empty cudf object containing the desired metadata.
 
@@ -212,6 +221,7 @@ def make_meta_object_cudf(x, index=None):
 
 
 @concat_dispatch.register((cudf.DataFrame, cudf.Series, cudf.BaseIndex))
+@annotate("concat_cudf", color="green", domain="dask_cudf_python")
 def concat_cudf(
     dfs,
     axis=0,
@@ -236,11 +246,13 @@ def concat_cudf(
 @categorical_dtype_dispatch.register(
     (cudf.DataFrame, cudf.Series, cudf.BaseIndex)
 )
+@annotate("categorical_dtype_cudf", color="green", domain="dask_cudf_python")
 def categorical_dtype_cudf(categories=None, ordered=None):
     return cudf.CategoricalDtype(categories=categories, ordered=ordered)
 
 
 @tolist_dispatch.register((cudf.Series, cudf.BaseIndex))
+@annotate("tolist_cudf", color="green", domain="dask_cudf_python")
 def tolist_cudf(obj):
     return obj.to_arrow().to_pylist()
 
@@ -248,6 +260,9 @@ def tolist_cudf(obj):
 @is_categorical_dtype_dispatch.register(
     (cudf.Series, cudf.BaseIndex, cudf.CategoricalDtype, Series)
 )
+@annotate(
+    "is_categorical_dtype_cudf", color="green", domain="dask_cudf_python"
+)
 def is_categorical_dtype_cudf(obj):
     return cudf.api.types.is_categorical_dtype(obj)
 
@@ -261,6 +276,7 @@ def is_categorical_dtype_cudf(obj):
         )
 
     @percentile_lookup.register((cudf.Series, cp.ndarray, cudf.BaseIndex))
+    @annotate("percentile_cudf", color="green", domain="dask_cudf_python")
     def percentile_cudf(a, q, interpolation="linear"):
         # Cudf dispatch to the equivalent of `np.percentile`:
         # https://numpy.org/doc/stable/reference/generated/numpy.percentile.html
@@ -305,6 +321,7 @@ def percentile_cudf(a, q, interpolation="linear"):
 
 
 @union_categoricals_dispatch.register((cudf.Series, cudf.BaseIndex))
+@annotate("union_categoricals_cudf", color="green", domain="dask_cudf_python")
 def union_categoricals_cudf(
     to_union, sort_categories=False, ignore_order=False
 ):
@@ -313,11 +330,13 @@ def union_categoricals_cudf(
     )
 
 
+@annotate("safe_hash", color="green", domain="dask_cudf_python")
 def safe_hash(frame):
     return cudf.Series(frame.hash_values(), index=frame.index)
 
 
 @hash_object_dispatch.register((cudf.DataFrame, cudf.Series))
+@annotate("hash_object_cudf", color="green", domain="dask_cudf_python")
 def hash_object_cudf(frame, index=True):
     if index:
         return safe_hash(frame.reset_index())
@@ -325,6 +344,7 @@ def hash_object_cudf(frame, index=True):
 
 
 @hash_object_dispatch.register(cudf.BaseIndex)
+@annotate("hash_object_cudf_index", color="green", domain="dask_cudf_python")
 def hash_object_cudf_index(ind, index=None):
 
     if isinstance(ind, cudf.MultiIndex):
@@ -335,6 +355,7 @@ def hash_object_cudf_index(ind, index=None):
 
 
 @group_split_dispatch.register((cudf.Series, cudf.DataFrame))
+@annotate("group_split_cudf", color="green", domain="dask_cudf_python")
 def group_split_cudf(df, c, k, ignore_index=False):
     return dict(
         zip(
@@ -349,10 +370,12 @@ def group_split_cudf(df, c, k, ignore_index=False):
 
 
 @sizeof_dispatch.register(cudf.DataFrame)
+@annotate("sizeof_cudf_dataframe", color="green", domain="dask_cudf_python")
 def sizeof_cudf_dataframe(df):
     return int(df.memory_usage().sum())
 
 
 @sizeof_dispatch.register((cudf.Series, cudf.BaseIndex))
+@annotate("sizeof_cudf_series_index", color="green", domain="dask_cudf_python")
 def sizeof_cudf_series_index(obj):
     return obj.memory_usage()
diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index e191873f82b..c195240501f 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import pandas as pd
+from nvtx import annotate
 from tlz import partition_all
 
 import dask
@@ -57,6 +58,7 @@ def __dask_postcompute__(self):
     def __dask_postpersist__(self):
         return type(self), (self._name, self._meta, self.divisions)
 
+    @annotate("_FRAME_INIT", color="green", domain="dask_cudf_python")
     def __init__(self, dsk, name, meta, divisions):
         if not isinstance(dsk, HighLevelGraph):
             dsk = HighLevelGraph.from_collections(name, dsk, dependencies=[])
@@ -82,6 +84,9 @@ def __repr__(self):
         s = "<dask_cudf.%s | %d tasks | %d npartitions>"
         return s % (type(self).__name__, len(self.dask), self.npartitions)
 
+    @annotate(
+        "_FRAME_to_dask_dataframe", color="green", domain="dask_cudf_python"
+    )
     def to_dask_dataframe(self, **kwargs):
         """Create a dask.dataframe object from a dask_cudf object"""
         nullable_pd_dtype = kwargs.get("nullable_pd_dtype", False)
@@ -99,6 +104,9 @@ def to_dask_dataframe(self, **kwargs):
 class DataFrame(_Frame, dd.core.DataFrame):
     _partition_type = cudf.DataFrame
 
+    @annotate(
+        "DATAFRAME_assign_column", color="green", domain="dask_cudf_python"
+    )
     def _assign_column(self, k, v):
         def assigner(df, k, v):
             out = df.copy()
@@ -108,6 +116,7 @@ def assigner(df, k, v):
         meta = assigner(self._meta, k, dask_make_meta(v))
         return self.map_partitions(assigner, k, v, meta=meta)
 
+    @annotate("DATAFRAME_apply_rows", color="green", domain="dask_cudf_python")
     def apply_rows(self, func, incols, outcols, kwargs=None, cache_key=None):
         import uuid
 
@@ -127,6 +136,7 @@ def do_apply_rows(df, func, incols, outcols, kwargs):
             do_apply_rows, func, incols, outcols, kwargs, meta=meta
         )
 
+    @annotate("DATAFRAME_merge", color="green", domain="dask_cudf_python")
     def merge(self, other, **kwargs):
         if kwargs.pop("shuffle", "tasks") != "tasks":
             raise ValueError(
@@ -138,6 +148,7 @@ def merge(self, other, **kwargs):
             on = list(on)
         return super().merge(other, on=on, shuffle="tasks", **kwargs)
 
+    @annotate("DATAFRAME_join", color="green", domain="dask_cudf_python")
     def join(self, other, **kwargs):
         if kwargs.pop("shuffle", "tasks") != "tasks":
             raise ValueError(
@@ -155,6 +166,7 @@ def join(self, other, **kwargs):
             on = list(on)
         return super().join(other, how=how, on=on, shuffle="tasks", **kwargs)
 
+    @annotate("DATAFRAME_set_index", color="green", domain="dask_cudf_python")
     def set_index(self, other, sorted=False, divisions=None, **kwargs):
         if kwargs.pop("shuffle", "tasks") != "tasks":
             raise ValueError(
@@ -226,6 +238,9 @@ def set_index(self, other, sorted=False, divisions=None, **kwargs):
             **kwargs,
         )
 
+    @annotate(
+        "DATAFRAME_sort_values", color="green", domain="dask_cudf_python"
+    )
     def sort_values(
         self,
         by,
@@ -261,12 +276,14 @@ def sort_values(
             return df.reset_index(drop=True)
         return df
 
+    @annotate("DATAFRAME_to_parquet", color="green", domain="dask_cudf_python")
     def to_parquet(self, path, *args, **kwargs):
         """Calls dask.dataframe.io.to_parquet with CudfEngine backend"""
         from dask_cudf.io import to_parquet
 
         return to_parquet(self, path, *args, **kwargs)
 
+    @annotate("DATAFRAME_to_orc", color="green", domain="dask_cudf_python")
     def to_orc(self, path, **kwargs):
         """Calls dask_cudf.io.to_orc"""
         from dask_cudf.io import to_orc
@@ -274,6 +291,7 @@ def to_orc(self, path, **kwargs):
         return to_orc(self, path, **kwargs)
 
     @derived_from(pd.DataFrame)
+    @annotate("DATAFRAME_var", color="green", domain="dask_cudf_python")
     def var(
         self,
         axis=None,
@@ -302,6 +320,9 @@ def var(
         else:
             return _parallel_var(self, meta, skipna, split_every, out)
 
+    @annotate(
+        "DATAFRAME_repartition", color="green", domain="dask_cudf_python"
+    )
     def repartition(self, *args, **kwargs):
         """Wraps dask.dataframe DataFrame.repartition method.
         Uses DataFrame.shuffle if `columns=` is specified.
@@ -324,6 +345,7 @@ def repartition(self, *args, **kwargs):
             )
         return super().repartition(*args, **kwargs)
 
+    @annotate("DATAFRAME_shuffle", color="green", domain="dask_cudf_python")
     def shuffle(self, *args, **kwargs):
         """Wraps dask.dataframe DataFrame.shuffle method"""
         shuffle_arg = kwargs.pop("shuffle", None)
@@ -331,18 +353,21 @@ def shuffle(self, *args, **kwargs):
             raise ValueError("dask_cudf does not support disk-based shuffle.")
         return super().shuffle(*args, shuffle="tasks", **kwargs)
 
+    @annotate("DATAFRAME_groupby", color="green", domain="dask_cudf_python")
     def groupby(self, by=None, **kwargs):
         from .groupby import CudfDataFrameGroupBy
 
         return CudfDataFrameGroupBy(self, by=by, **kwargs)
 
 
+@annotate("DATAFRAME_sum_of_squares", color="green", domain="dask_cudf_python")
 def sum_of_squares(x):
     x = x.astype("f8")._column
     outcol = libcudf.reduce.reduce("sum_of_squares", x)
     return cudf.Series(outcol)
 
 
+@annotate("DATAFRAME_var_aggregate", color="green", domain="dask_cudf_python")
 def var_aggregate(x2, x, n, ddof):
     try:
         with warnings.catch_warnings(record=True):
@@ -355,10 +380,12 @@ def var_aggregate(x2, x, n, ddof):
         return np.float64(np.nan)
 
 
+@annotate("DATAFRAME_nlargest_agg", color="green", domain="dask_cudf_python")
 def nlargest_agg(x, **kwargs):
     return cudf.concat(x).nlargest(**kwargs)
 
 
+@annotate("DATAFRAME_nsmallest_agg", color="green", domain="dask_cudf_python")
 def nsmallest_agg(x, **kwargs):
     return cudf.concat(x).nsmallest(**kwargs)
 
@@ -366,6 +393,7 @@ def nsmallest_agg(x, **kwargs):
 class Series(_Frame, dd.core.Series):
     _partition_type = cudf.Series
 
+    @annotate("Series_count", color="green", domain="dask_cudf_python")
     def count(self, split_every=False):
         return reduction(
             [self],
@@ -375,12 +403,14 @@ def count(self, split_every=False):
             meta="i8",
         )
 
+    @annotate("Series_mean", color="green", domain="dask_cudf_python")
     def mean(self, split_every=False):
         sum = self.sum(split_every=split_every)
         n = self.count(split_every=split_every)
         return sum / n
 
     @derived_from(pd.DataFrame)
+    @annotate("Series_var", color="green", domain="dask_cudf_python")
     def var(
         self,
         axis=None,
@@ -409,16 +439,19 @@ def var(
         else:
             return _parallel_var(self, meta, skipna, split_every, out)
 
+    @annotate("Series_groupby", color="green", domain="dask_cudf_python")
     def groupby(self, *args, **kwargs):
         from .groupby import CudfSeriesGroupBy
 
         return CudfSeriesGroupBy(self, *args, **kwargs)
 
     @property
+    @annotate("Series_list", color="green", domain="dask_cudf_python")
     def list(self):
         return ListMethods(self)
 
     @property
+    @annotate("Series_struct", color="green", domain="dask_cudf_python")
     def struct(self):
         return StructMethods(self)
 
@@ -427,6 +460,7 @@ class Index(Series, dd.core.Index):
     _partition_type = cudf.Index  # type: ignore
 
 
+@annotate("_naive_var", color="green", domain="dask_cudf_python")
 def _naive_var(ddf, meta, skipna, ddof, split_every, out):
     num = ddf._get_numeric_data()
     x = 1.0 * num.sum(skipna=skipna, split_every=split_every)
@@ -441,6 +475,7 @@ def _naive_var(ddf, meta, skipna, ddof, split_every, out):
     return handle_out(out, result)
 
 
+@annotate("_parallel_var", color="green", domain="dask_cudf_python")
 def _parallel_var(ddf, meta, skipna, split_every, out):
     def _local_var(x, skipna):
         if skipna:
@@ -507,6 +542,7 @@ def _finalize_var(vals):
     return handle_out(out, result)
 
 
+@annotate("_extract_meta", color="green", domain="dask_cudf_python")
 def _extract_meta(x):
     """
     Extract internal cache data (``_meta``) from dask_cudf objects
@@ -522,6 +558,7 @@ def _extract_meta(x):
     return x
 
 
+@annotate("_emulate", color="green", domain="dask_cudf_python")
 def _emulate(func, *args, **kwargs):
     """
     Apply a function using args / kwargs. If arguments contain dd.DataFrame /
@@ -531,6 +568,7 @@ def _emulate(func, *args, **kwargs):
         return func(*_extract_meta(args), **_extract_meta(kwargs))
 
 
+@annotate("align_partitions", color="green", domain="dask_cudf_python")
 def align_partitions(args):
     """Align partitions between dask_cudf objects.
 
@@ -546,6 +584,7 @@ def align_partitions(args):
     return args
 
 
+@annotate("reduction", color="green", domain="dask_cudf_python")
 def reduction(
     args,
     chunk=None,
@@ -686,6 +725,7 @@ def reduction(
     return dd.core.new_dd_object(graph, b, meta, (None, None))
 
 
+@annotate("from_cudf", color="green", domain="dask_cudf_python")
 def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None):
     if isinstance(getattr(data, "index", None), cudf.MultiIndex):
         raise NotImplementedError(
@@ -707,6 +747,7 @@ def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None):
 )
 
 
+@annotate("from_dask_dataframe", color="green", domain="dask_cudf_python")
 def from_dask_dataframe(df):
     return df.map_partitions(cudf.from_pandas)
 
diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py
index 1bc270a5b9f..658e63ea923 100644
--- a/python/dask_cudf/dask_cudf/groupby.py
+++ b/python/dask_cudf/dask_cudf/groupby.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import pandas as pd
+from nvtx import annotate
 
 from dask.base import tokenize
 from dask.dataframe.core import (
@@ -35,11 +36,19 @@
 
 
 class CudfDataFrameGroupBy(DataFrameGroupBy):
+    @annotate(
+        "CudfDataFrameGroupBy_INIT", color="green", domain="dask_cudf_python"
+    )
     def __init__(self, *args, **kwargs):
         self.sep = kwargs.pop("sep", "___")
         self.as_index = kwargs.pop("as_index", True)
         super().__init__(*args, **kwargs)
 
+    @annotate(
+        "CudfDataFrameGroupBy_GETITEM",
+        color="green",
+        domain="dask_cudf_python",
+    )
     def __getitem__(self, key):
         if isinstance(key, list):
             g = CudfDataFrameGroupBy(
@@ -53,6 +62,9 @@ def __getitem__(self, key):
         g._meta = g._meta[key]
         return g
 
+    @annotate(
+        "CudfDataFrameGroupBy_MEAN", color="green", domain="dask_cudf_python"
+    )
     def mean(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -66,6 +78,11 @@ def mean(self, split_every=None, split_out=1):
             as_index=self.as_index,
         )
 
+    @annotate(
+        "CudfDataFrameGroupBy_COLLECT",
+        color="green",
+        domain="dask_cudf_python",
+    )
     def collect(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -79,6 +96,11 @@ def collect(self, split_every=None, split_out=1):
             as_index=self.as_index,
         )
 
+    @annotate(
+        "CudfDataFrameGroupBy_AGGREGATE",
+        color="green",
+        domain="dask_cudf_python",
+    )
     def aggregate(self, arg, split_every=None, split_out=1):
         if arg == "size":
             return self.size()
@@ -118,11 +140,17 @@ def aggregate(self, arg, split_every=None, split_out=1):
 
 
 class CudfSeriesGroupBy(SeriesGroupBy):
+    @annotate(
+        "CudfSeriesGroupBy_INIT", color="green", domain="dask_cudf_python"
+    )
     def __init__(self, *args, **kwargs):
         self.sep = kwargs.pop("sep", "___")
         self.as_index = kwargs.pop("as_index", True)
         super().__init__(*args, **kwargs)
 
+    @annotate(
+        "CudfSeriesGroupBy_MEAN", color="green", domain="dask_cudf_python"
+    )
     def mean(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -136,6 +164,9 @@ def mean(self, split_every=None, split_out=1):
             as_index=self.as_index,
         )[self._slice]
 
+    @annotate(
+        "CudfSeriesGroupBy_STD", color="green", domain="dask_cudf_python"
+    )
     def std(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -149,6 +180,9 @@ def std(self, split_every=None, split_out=1):
             as_index=self.as_index,
         )[self._slice]
 
+    @annotate(
+        "CudfSeriesGroupBy_VAR", color="green", domain="dask_cudf_python"
+    )
     def var(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -162,6 +196,9 @@ def var(self, split_every=None, split_out=1):
             as_index=self.as_index,
         )[self._slice]
 
+    @annotate(
+        "CudfSeriesGroupBy_COLLECT", color="green", domain="dask_cudf_python"
+    )
     def collect(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -175,6 +212,9 @@ def collect(self, split_every=None, split_out=1):
             as_index=self.as_index,
         )[self._slice]
 
+    @annotate(
+        "CudfSeriesGroupBy_AGGREGATE", color="green", domain="dask_cudf_python"
+    )
     def aggregate(self, arg, split_every=None, split_out=1):
         if arg == "size":
             return self.size()
@@ -205,6 +245,7 @@ def aggregate(self, arg, split_every=None, split_out=1):
         )
 
 
+@annotate("groupby_agg", color="green", domain="dask_cudf_python")
 def groupby_agg(
     ddf,
     gb_cols,
@@ -371,6 +412,7 @@ def groupby_agg(
     return new_dd_object(graph, gb_agg_name, _meta, divisions)
 
 
+@annotate("_redirect_aggs", color="green", domain="dask_cudf_python")
 def _redirect_aggs(arg):
     """Redirect aggregations to their corresponding name in cuDF"""
     redirects = {
@@ -397,6 +439,7 @@ def _redirect_aggs(arg):
     return redirects.get(arg, arg)
 
 
+@annotate("_is_supported", color="green", domain="dask_cudf_python")
 def _is_supported(arg, supported: set):
     """Check that aggregations in `arg` are a subset of `supported`"""
     if isinstance(arg, (list, dict)):
@@ -422,6 +465,7 @@ def _make_name(*args, sep="_"):
     return sep.join(_args)
 
 
+@annotate("_groupby_partition_agg", color="green", domain="dask_cudf_python")
 def _groupby_partition_agg(
     df, gb_cols, aggs, columns, split_out, dropna, sort, sep
 ):
@@ -479,6 +523,7 @@ def _groupby_partition_agg(
     return output
 
 
+@annotate("_tree_node_agg", color="green", domain="dask_cudf_python")
 def _tree_node_agg(dfs, gb_cols, split_out, dropna, sort, sep):
     """Node in groupby-aggregation reduction tree.
 
@@ -513,6 +558,7 @@ def _tree_node_agg(dfs, gb_cols, split_out, dropna, sort, sep):
     return gb
 
 
+@annotate("_var_agg", color="green", domain="dask_cudf_python")
 def _var_agg(df, col, count_name, sum_name, pow2_sum_name, ddof=1):
     """Calculate variance (given count, sum, and sum-squared columns)."""
 
@@ -534,6 +580,7 @@ def _var_agg(df, col, count_name, sum_name, pow2_sum_name, ddof=1):
     return var
 
 
+@annotate("_finalize_gb_agg", color="green", domain="dask_cudf_python")
 def _finalize_gb_agg(
     gb,
     gb_cols,
diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py
index af40d9ca41b..63755cf9b89 100644
--- a/python/dask_cudf/dask_cudf/sorting.py
+++ b/python/dask_cudf/dask_cudf/sorting.py
@@ -4,6 +4,7 @@
 import cupy
 import numpy as np
 import tlz as toolz
+from nvtx import annotate
 
 from dask.base import tokenize
 from dask.dataframe import methods
@@ -16,12 +17,14 @@
 from cudf.api.types import is_categorical_dtype
 
 
+@annotate("set_index_post", color="green", domain="dask_cudf_python")
 def set_index_post(df, index_name, drop, column_dtype):
     df2 = df.set_index(index_name, drop=drop)
     df2.columns = df2.columns.astype(column_dtype)
     return df2
 
 
+@annotate("_set_partitions_pre", color="green", domain="dask_cudf_python")
 def _set_partitions_pre(s, divisions, ascending=True, na_position="last"):
     if ascending:
         partitions = divisions.searchsorted(s, side="right") - 1
@@ -38,6 +41,7 @@ def _set_partitions_pre(s, divisions, ascending=True, na_position="last"):
     return partitions
 
 
+@annotate("_quantile", color="green", domain="dask_cudf_python")
 def _quantile(a, q):
     n = len(a)
     if not len(a):
@@ -45,6 +49,7 @@ def _quantile(a, q):
     return (a.quantiles(q=q.tolist(), interpolation="nearest"), n)
 
 
+@annotate("merge_quantiles", color="green", domain="dask_cudf_python")
 def merge_quantiles(finalq, qs, vals):
     """Combine several quantile calculations of different data.
     [NOTE: Same logic as dask.array merge_percentiles]
@@ -107,6 +112,7 @@ def _append_counts(val, count):
     return rv.reset_index(drop=True)
 
 
+@annotate("_approximate_quantile", color="green", domain="dask_cudf_python")
 def _approximate_quantile(df, q):
     """Approximate quantiles of DataFrame or Series.
     [NOTE: Same logic as dask.dataframe Series quantile]
@@ -180,6 +186,7 @@ def set_quantile_index(df):
     return df
 
 
+@annotate("quantile_divisions", color="green", domain="cudf_python")
 def quantile_divisions(df, by, npartitions):
     qn = np.linspace(0.0, 1.0, npartitions + 1).tolist()
     divisions = _approximate_quantile(df[by], qn).compute()
@@ -213,6 +220,7 @@ def quantile_divisions(df, by, npartitions):
     return divisions
 
 
+@annotate("sort_values", color="green", domain="cudf_python")
 def sort_values(
     df,
     by,

From ed1519b566c31949b9663b4e52fb08997254a3d6 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 9 Feb 2022 19:50:52 -0600
Subject: [PATCH 6/8] Update backends.py

---
 python/dask_cudf/dask_cudf/backends.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 9ad4bfb5b82..1b1f3e29ab2 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from collections.abc import Iterator
 

From 6170c5c06b2a743bd7ecce5d223b0fe3e93daba4 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 9 Feb 2022 19:51:07 -0600
Subject: [PATCH 7/8] Update core.py

---
 python/dask_cudf/dask_cudf/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index f4764beeb57..d8802f33941 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 import math
 import warnings

From e201d11ee2e98d62c2cf9c6c15ac60b7e76cd98c Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 9 Feb 2022 19:51:25 -0600
Subject: [PATCH 8/8] Update sorting.py

---
 python/dask_cudf/dask_cudf/sorting.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py
index 63755cf9b89..ada738c5a9b 100644
--- a/python/dask_cudf/dask_cudf/sorting.py
+++ b/python/dask_cudf/dask_cudf/sorting.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
 from collections.abc import Iterator
 
 import cupy