Add nvtx annotations to spilling-based data movement (#14196)

In nsys traces, it is useful to be able to see when memory allocations are provoking cudf-managed spilling. Do this by adding appropriate nvtx annotations. To enable this, we must move the nvtx decorators to a separate file avoiding circular imports. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) - Richard (Rick) Zamora (https://github.com/rjzamora) URL: #14196
rapidsai · Oct 18, 2023 · 7aa7579 · 7aa7579
1 parent 29b2537
commit 7aa7579
Show file tree

Hide file tree

Showing 21 changed files with 82 additions and 63 deletions.
diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py
@@ -11,14 +11,20 @@
 import weakref
 from collections import defaultdict
 from dataclasses import dataclass
+from functools import partial
 from typing import Dict, List, Optional, Tuple
 
 import rmm.mr
 
 from cudf.core.buffer.spillable_buffer import SpillableBuffer
 from cudf.options import get_option
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.string import format_bytes
 
+_spill_cudf_nvtx_annotate = partial(
+    _cudf_nvtx_annotate, domain="cudf_python-spill"
+)
+
 
 def get_traceback() -> str:
     """Pretty print current traceback to a string"""
@@ -329,6 +335,7 @@ def buffers(
             ret = tuple(sorted(ret, key=lambda b: b.last_accessed))
         return ret
 
+    @_spill_cudf_nvtx_annotate
     def spill_device_memory(self, nbytes: int) -> int:
         """Try to spill device memory
 

diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -20,6 +20,7 @@
     get_ptr_and_size,
     host_memory_allocation,
 )
+from cudf.utils.nvtx_annotation import _get_color_for_nvtx, annotate
 from cudf.utils.string import format_bytes
 
 if TYPE_CHECKING:
@@ -291,8 +292,15 @@ def spill(self, target: str = "cpu") -> None:
                 )
 
             if (ptr_type, target) == ("gpu", "cpu"):
-                host_mem = host_memory_allocation(self.size)
-                rmm._lib.device_buffer.copy_ptr_to_host(self._ptr, host_mem)
+                with annotate(
+                    message="SpillDtoH",
+                    color=_get_color_for_nvtx("SpillDtoH"),
+                    domain="cudf_python-spill",
+                ):
+                    host_mem = host_memory_allocation(self.size)
+                    rmm._lib.device_buffer.copy_ptr_to_host(
+                        self._ptr, host_mem
+                    )
                 self._ptr_desc["memoryview"] = host_mem
                 self._ptr = 0
                 self._owner = None
@@ -302,9 +310,15 @@ def spill(self, target: str = "cpu") -> None:
                 # trigger a new call to this buffer's `spill()`.
                 # Therefore, it is important that spilling-on-demand doesn't
                 # try to unspill an already locked buffer!
-                dev_mem = rmm.DeviceBuffer.to_device(
-                    self._ptr_desc.pop("memoryview")
-                )
+                with annotate(
+                    message="SpillHtoD",
+                    color=_get_color_for_nvtx("SpillHtoD"),
+                    domain="cudf_python-spill",
+                ):
+
+                    dev_mem = rmm.DeviceBuffer.to_device(
+                        self._ptr_desc.pop("memoryview")
+                    )
                 self._ptr = dev_mem.ptr
                 self._owner = dev_mem
                 assert self._size == dev_mem.size

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -95,11 +95,8 @@
     min_scalar_type,
     numeric_normalize_types,
 )
-from cudf.utils.utils import (
-    GetAttrGetItemMixin,
-    _cudf_nvtx_annotate,
-    _external_only_api,
-)
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api
 
 _cupy_nan_methods_map = {
     "min": "nanmin",

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
@@ -47,11 +47,8 @@
 from cudf.utils import ioutils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import find_common_type
-from cudf.utils.utils import (
-    _array_ufunc,
-    _cudf_nvtx_annotate,
-    _warn_no_dask_cudf,
-)
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf
 
 
 # TODO: It looks like Frame is missing a declaration of `copy`, need to add

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
@@ -29,7 +29,8 @@
 from cudf.core.mixins import Reducible, Scannable
 from cudf.core.multiindex import MultiIndex
 from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply
-from cudf.utils.utils import GetAttrGetItemMixin, _cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import GetAttrGetItemMixin
 
 
 # The three functions below return the quantiles [25%, 50%, 75%]

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
@@ -64,12 +64,8 @@
     is_mixed_with_object_dtype,
     numeric_normalize_types,
 )
-from cudf.utils.utils import (
-    _cudf_nvtx_annotate,
-    _is_same_name,
-    _warn_no_dask_cudf,
-    search_range,
-)
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import _is_same_name, _warn_no_dask_cudf, search_range
 
 
 def _lexsorted_equal_range(

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
@@ -69,7 +69,8 @@
 )
 from cudf.utils import docutils
 from cudf.utils._numba import _CUDFNumbaConfig
-from cudf.utils.utils import _cudf_nvtx_annotate, _warn_no_dask_cudf
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import _warn_no_dask_cudf
 
 doc_reset_index_template = """
         Reset the index of the {klass}, or a level of it.

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
@@ -26,7 +26,8 @@
 from cudf.core._compat import PANDAS_GE_150
 from cudf.core.frame import Frame
 from cudf.core.index import BaseIndex, _lexsorted_equal_range, as_index
-from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate, _is_same_name
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import NotIterable, _is_same_name
 
 
 def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]:

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -90,7 +90,7 @@
     is_mixed_with_object_dtype,
     to_cudf_compatible_scalar,
 )
-from cudf.utils.utils import _cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 
 
 def _format_percentile_names(percentiles):

diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
@@ -19,7 +19,8 @@
 )
 from cudf.core.column import ColumnBase, as_column
 from cudf.core.frame import Frame
-from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import NotIterable
 
 
 class SingleColumnFrame(Frame, NotIterable):

diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py
@@ -28,7 +28,7 @@
     _supported_dtypes_from_frame,
 )
 from cudf.utils._numba import _CUDFNumbaConfig
-from cudf.utils.utils import _cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 
 
 def _get_frame_groupby_type(dtype, index_dtype):

diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
@@ -39,7 +39,8 @@
     STRING_TYPES,
     TIMEDELTA_TYPES,
 )
-from cudf.utils.utils import _cudf_nvtx_annotate, initfunc
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.utils import initfunc
 
 # Maximum size of a string column is 2 GiB
 _STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get(

diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
@@ -11,7 +11,7 @@
 from cudf.api.types import is_scalar
 from cudf.utils import ioutils
 from cudf.utils.dtypes import _maybe_convert_to_default_type
-from cudf.utils.utils import _cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 
 
 @_cudf_nvtx_annotate

diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
@@ -22,7 +22,7 @@
 from cudf.api.types import is_list_like
 from cudf.core.column import build_categorical_column, column_empty, full
 from cudf.utils import ioutils
-from cudf.utils.utils import _cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 
 BYTE_SIZES = {
     "kb": 1000,

diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
@@ -1,11 +1,11 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 from io import BytesIO, StringIO
 
 import cudf
 from cudf._lib import text as libtext
 from cudf.utils import ioutils
-from cudf.utils.utils import _cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 
 
 @_cudf_nvtx_annotate

diff --git a/python/cudf/cudf/utils/nvtx_annotation.py b/python/cudf/cudf/utils/nvtx_annotation.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+import hashlib
+from functools import partial
+
+from nvtx import annotate
+
+_NVTX_COLORS = ["green", "blue", "purple", "rapids"]
+
+
+def _get_color_for_nvtx(name):
+    m = hashlib.sha256()
+    m.update(name.encode())
+    hash_value = int(m.hexdigest(), 16)
+    idx = hash_value % len(_NVTX_COLORS)
+    return _NVTX_COLORS[idx]
+
+
+def _cudf_nvtx_annotate(func, domain="cudf_python"):
+    """Decorator for applying nvtx annotations to methods in cudf."""
+    return annotate(
+        message=func.__qualname__,
+        color=_get_color_for_nvtx(func.__qualname__),
+        domain=domain,
+    )(func)
+
+
+_dask_cudf_nvtx_annotate = partial(
+    _cudf_nvtx_annotate, domain="dask_cudf_python"
+)
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
@@ -2,15 +2,12 @@
 
 import decimal
 import functools
-import hashlib
 import os
 import traceback
 import warnings
-from functools import partial
 from typing import FrozenSet, Set, Union
 
 import numpy as np
-from nvtx import annotate
 
 import rmm
 
@@ -119,8 +116,6 @@ def _array_ufunc(obj, ufunc, method, inputs, kwargs):
     "__ge__",
 }
 
-_NVTX_COLORS = ["green", "blue", "purple", "rapids"]
-
 # The test root is set by pytest to support situations where tests are run from
 # a source tree on a built version of cudf.
 NO_EXTERNAL_ONLY_APIS = os.getenv("NO_EXTERNAL_ONLY_APIS")
@@ -353,28 +348,6 @@ def is_na_like(obj):
     return obj is None or obj is cudf.NA or obj is cudf.NaT
 
 
-def _get_color_for_nvtx(name):
-    m = hashlib.sha256()
-    m.update(name.encode())
-    hash_value = int(m.hexdigest(), 16)
-    idx = hash_value % len(_NVTX_COLORS)
-    return _NVTX_COLORS[idx]
-
-
-def _cudf_nvtx_annotate(func, domain="cudf_python"):
-    """Decorator for applying nvtx annotations to methods in cudf."""
-    return annotate(
-        message=func.__qualname__,
-        color=_get_color_for_nvtx(func.__qualname__),
-        domain=domain,
-    )(func)
-
-
-_dask_cudf_nvtx_annotate = partial(
-    _cudf_nvtx_annotate, domain="dask_cudf_python"
-)
-
-
 def _warn_no_dask_cudf(fn):
     @functools.wraps(fn)
     def wrapper(self):

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
@@ -42,7 +42,7 @@
 
 import cudf
 from cudf.api.types import is_string_dtype
-from cudf.utils.utils import _dask_cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate
 
 from .core import DataFrame, Index, Series
 

diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
@@ -22,7 +22,7 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf.utils.utils import _dask_cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate
 
 from dask_cudf import sorting
 from dask_cudf.accessors import ListMethods, StructMethods

diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py
@@ -15,7 +15,7 @@
 from dask.utils import funcname
 
 import cudf
-from cudf.utils.utils import _dask_cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate
 
 # aggregations that are dask-cudf optimized
 OPTIMIZED_AGGS = (

diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py
@@ -16,7 +16,7 @@
 
 import cudf as gd
 from cudf.api.types import is_categorical_dtype
-from cudf.utils.utils import _dask_cudf_nvtx_annotate
+from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate
 
 _SHUFFLE_SUPPORT = ("tasks", "p2p")  # "disk" not supported