From babead6c86e82d90cf24123b1ac37db6f163e1f0 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 26 Sep 2023 16:49:52 +0100 Subject: [PATCH 1/3] Move nvtx annotation utilities to a separate file This will enable us to use them in the spilling code without circular import issues. --- python/cudf/cudf/utils/nvtx_annotation.py | 30 ++++++++++++++++++++++ python/cudf/cudf/utils/utils.py | 31 +++-------------------- 2 files changed, 34 insertions(+), 27 deletions(-) create mode 100644 python/cudf/cudf/utils/nvtx_annotation.py diff --git a/python/cudf/cudf/utils/nvtx_annotation.py b/python/cudf/cudf/utils/nvtx_annotation.py new file mode 100644 index 00000000000..a4404e51232 --- /dev/null +++ b/python/cudf/cudf/utils/nvtx_annotation.py @@ -0,0 +1,30 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import hashlib +from functools import partial + +from nvtx import annotate + +_NVTX_COLORS = ["green", "blue", "purple", "rapids"] + + +def _get_color_for_nvtx(name): + m = hashlib.sha256() + m.update(name.encode()) + hash_value = int(m.hexdigest(), 16) + idx = hash_value % len(_NVTX_COLORS) + return _NVTX_COLORS[idx] + + +def _cudf_nvtx_annotate(func, domain="cudf_python"): + """Decorator for applying nvtx annotations to methods in cudf.""" + return annotate( + message=func.__qualname__, + color=_get_color_for_nvtx(func.__qualname__), + domain=domain, + )(func) + + +_dask_cudf_nvtx_annotate = partial( + _cudf_nvtx_annotate, domain="dask_cudf_python" +) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index e2cb3f145a1..d219b075178 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -2,15 +2,13 @@ import decimal import functools -import hashlib import os import traceback import warnings -from functools import partial from typing import FrozenSet, Set, Union import numpy as np -from nvtx import annotate +from nvtx import annotate # noqa: F401 import rmm @@ -18,6 +16,9 @@ import cudf.api.types from cudf.core import column from cudf.core.buffer import as_buffer +from cudf.utils.nvtx_annotation import _NVTX_COLORS # noqa: F401 +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate # noqa: F401 +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate # noqa: F401 # The size of the mask in bytes mask_dtype = cudf.api.types.dtype(np.int32) @@ -119,8 +120,6 @@ def _array_ufunc(obj, ufunc, method, inputs, kwargs): "__ge__", } -_NVTX_COLORS = ["green", "blue", "purple", "rapids"] - # The test root is set by pytest to support situations where tests are run from # a source tree on a built version of cudf. NO_EXTERNAL_ONLY_APIS = os.getenv("NO_EXTERNAL_ONLY_APIS") @@ -353,28 +352,6 @@ def is_na_like(obj): return obj is None or obj is cudf.NA or obj is cudf.NaT -def _get_color_for_nvtx(name): - m = hashlib.sha256() - m.update(name.encode()) - hash_value = int(m.hexdigest(), 16) - idx = hash_value % len(_NVTX_COLORS) - return _NVTX_COLORS[idx] - - -def _cudf_nvtx_annotate(func, domain="cudf_python"): - """Decorator for applying nvtx annotations to methods in cudf.""" - return annotate( - message=func.__qualname__, - color=_get_color_for_nvtx(func.__qualname__), - domain=domain, - )(func) - - -_dask_cudf_nvtx_annotate = partial( - _cudf_nvtx_annotate, domain="dask_cudf_python" -) - - def _warn_no_dask_cudf(fn): @functools.wraps(fn) def wrapper(self): From 4f83404e943957739290d359658ef3eae456d02e Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 26 Sep 2023 17:02:04 +0100 Subject: [PATCH 2/3] Adapt nvtx imports --- python/cudf/cudf/core/dataframe.py | 7 ++----- python/cudf/cudf/core/frame.py | 7 ++----- python/cudf/cudf/core/groupby/groupby.py | 3 ++- python/cudf/cudf/core/index.py | 8 ++------ python/cudf/cudf/core/indexed_frame.py | 3 ++- python/cudf/cudf/core/multiindex.py | 3 ++- python/cudf/cudf/core/series.py | 2 +- python/cudf/cudf/core/single_column_frame.py | 3 ++- python/cudf/cudf/core/udf/groupby_utils.py | 2 +- python/cudf/cudf/core/udf/utils.py | 3 ++- python/cudf/cudf/io/csv.py | 2 +- python/cudf/cudf/io/parquet.py | 2 +- python/cudf/cudf/io/text.py | 4 ++-- python/cudf/cudf/utils/utils.py | 4 ---- python/dask_cudf/dask_cudf/backends.py | 2 +- python/dask_cudf/dask_cudf/core.py | 2 +- python/dask_cudf/dask_cudf/groupby.py | 2 +- python/dask_cudf/dask_cudf/sorting.py | 2 +- 18 files changed, 26 insertions(+), 35 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8a3dbe77787..e8acae9686a 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -95,11 +95,8 @@ min_scalar_type, numeric_normalize_types, ) -from cudf.utils.utils import ( - GetAttrGetItemMixin, - _cudf_nvtx_annotate, - _external_only_api, -) +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api _cupy_nan_methods_map = { "min": "nanmin", diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 1e6d177f8ca..7cb78bc8d1f 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -47,11 +47,8 @@ from cudf.utils import ioutils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import find_common_type -from cudf.utils.utils import ( - _array_ufunc, - _cudf_nvtx_annotate, - _warn_no_dask_cudf, -) +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf # TODO: It looks like Frame is missing a declaration of `copy`, need to add diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index e1740140b44..3b8f0f3824a 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -29,7 +29,8 @@ from cudf.core.mixins import Reducible, Scannable from cudf.core.multiindex import MultiIndex from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply -from cudf.utils.utils import GetAttrGetItemMixin, _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import GetAttrGetItemMixin # The three functions below return the quantiles [25%, 50%, 75%] diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index de8a5948033..5c323bda9ea 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -64,12 +64,8 @@ is_mixed_with_object_dtype, numeric_normalize_types, ) -from cudf.utils.utils import ( - _cudf_nvtx_annotate, - _is_same_name, - _warn_no_dask_cudf, - search_range, -) +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import _is_same_name, _warn_no_dask_cudf, search_range def _lexsorted_equal_range( diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 62e091b29b5..b3d70bc351e 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -69,7 +69,8 @@ ) from cudf.utils import docutils from cudf.utils._numba import _CUDFNumbaConfig -from cudf.utils.utils import _cudf_nvtx_annotate, _warn_no_dask_cudf +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import _warn_no_dask_cudf doc_reset_index_template = """ Reset the index of the {klass}, or a level of it. diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 21380bb841c..87a11478870 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -26,7 +26,8 @@ from cudf.core._compat import PANDAS_GE_150 from cudf.core.frame import Frame from cudf.core.index import BaseIndex, _lexsorted_equal_range, as_index -from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate, _is_same_name +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import NotIterable, _is_same_name def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]: diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index a195738af54..00ba722136e 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -90,7 +90,7 @@ is_mixed_with_object_dtype, to_cudf_compatible_scalar, ) -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate def _format_percentile_names(percentiles): diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 6a56ab8f3a5..e30e1c747f5 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -19,7 +19,8 @@ ) from cudf.core.column import ColumnBase, as_column from cudf.core.frame import Frame -from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import NotIterable class SingleColumnFrame(Frame, NotIterable): diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py index b18720f5db5..5dbcf455e33 100644 --- a/python/cudf/cudf/core/udf/groupby_utils.py +++ b/python/cudf/cudf/core/udf/groupby_utils.py @@ -28,7 +28,7 @@ _supported_dtypes_from_frame, ) from cudf.utils._numba import _CUDFNumbaConfig -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate def _get_frame_groupby_type(dtype, index_dtype): diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index 35a3f6c1ffd..7b7ac2b3070 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -39,7 +39,8 @@ STRING_TYPES, TIMEDELTA_TYPES, ) -from cudf.utils.utils import _cudf_nvtx_annotate, initfunc +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate +from cudf.utils.utils import initfunc # Maximum size of a string column is 2 GiB _STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get( diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index bacc0641639..764885dd7b6 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -11,7 +11,7 @@ from cudf.api.types import is_scalar from cudf.utils import ioutils from cudf.utils.dtypes import _maybe_convert_to_default_type -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index d8510cf8e95..d84aff66d7b 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -22,7 +22,7 @@ from cudf.api.types import is_list_like from cudf.core.column import build_categorical_column, column_empty, full from cudf.utils import ioutils -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate BYTE_SIZES = { "kb": 1000, diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py index eb2c7fa7ef6..0e19972f6e0 100644 --- a/python/cudf/cudf/io/text.py +++ b/python/cudf/cudf/io/text.py @@ -1,11 +1,11 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. from io import BytesIO, StringIO import cudf from cudf._lib import text as libtext from cudf.utils import ioutils -from cudf.utils.utils import _cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index d219b075178..0ff23bd37c6 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -8,7 +8,6 @@ from typing import FrozenSet, Set, Union import numpy as np -from nvtx import annotate # noqa: F401 import rmm @@ -16,9 +15,6 @@ import cudf.api.types from cudf.core import column from cudf.core.buffer import as_buffer -from cudf.utils.nvtx_annotation import _NVTX_COLORS # noqa: F401 -from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate # noqa: F401 -from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate # noqa: F401 # The size of the mask in bytes mask_dtype = cudf.api.types.dtype(np.int32) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index e3f4f04eb85..b1a8ca01924 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -42,7 +42,7 @@ import cudf from cudf.api.types import is_string_dtype -from cudf.utils.utils import _dask_cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate from .core import DataFrame, Index, Series diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 5b37e6e825c..17650c9b70d 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -22,7 +22,7 @@ import cudf from cudf import _lib as libcudf -from cudf.utils.utils import _dask_cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate from dask_cudf import sorting from dask_cudf.accessors import ListMethods, StructMethods diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index f4bbcaf4dd1..b1fdf443a17 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -15,7 +15,7 @@ from dask.utils import funcname import cudf -from cudf.utils.utils import _dask_cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate # aggregations that are dask-cudf optimized OPTIMIZED_AGGS = ( diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index e841f2d8830..e2c8a548100 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -16,7 +16,7 @@ import cudf as gd from cudf.api.types import is_categorical_dtype -from cudf.utils.utils import _dask_cudf_nvtx_annotate +from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate @_dask_cudf_nvtx_annotate From 76ac0f2312e73d18b4f970bf365ed412f8aee01d Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 26 Sep 2023 17:04:07 +0100 Subject: [PATCH 3/3] Add nvtx annotations for spill-induced memcpys This allows us to see when RMM allocations are taking an excessive amount of time due to spilling of cudf-managed buffers. --- python/cudf/cudf/core/buffer/spill_manager.py | 7 ++++++ .../cudf/cudf/core/buffer/spillable_buffer.py | 24 +++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py index f056a0fd592..91f3b2cd544 100644 --- a/python/cudf/cudf/core/buffer/spill_manager.py +++ b/python/cudf/cudf/core/buffer/spill_manager.py @@ -11,14 +11,20 @@ import weakref from collections import defaultdict from dataclasses import dataclass +from functools import partial from typing import Dict, List, Optional, Tuple import rmm.mr from cudf.core.buffer.spillable_buffer import SpillableBuffer from cudf.options import get_option +from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate from cudf.utils.string import format_bytes +_spill_cudf_nvtx_annotate = partial( + _cudf_nvtx_annotate, domain="cudf_python-spill" +) + def get_traceback() -> str: """Pretty print current traceback to a string""" @@ -329,6 +335,7 @@ def buffers( ret = tuple(sorted(ret, key=lambda b: b.last_accessed)) return ret + @_spill_cudf_nvtx_annotate def spill_device_memory(self, nbytes: int) -> int: """Try to spill device memory diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index 84fb2044c62..1856bec1876 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -20,6 +20,7 @@ get_ptr_and_size, host_memory_allocation, ) +from cudf.utils.nvtx_annotation import _get_color_for_nvtx, annotate from cudf.utils.string import format_bytes if TYPE_CHECKING: @@ -291,8 +292,15 @@ def spill(self, target: str = "cpu") -> None: ) if (ptr_type, target) == ("gpu", "cpu"): - host_mem = host_memory_allocation(self.size) - rmm._lib.device_buffer.copy_ptr_to_host(self._ptr, host_mem) + with annotate( + message="SpillDtoH", + color=_get_color_for_nvtx("SpillDtoH"), + domain="cudf_python-spill", + ): + host_mem = host_memory_allocation(self.size) + rmm._lib.device_buffer.copy_ptr_to_host( + self._ptr, host_mem + ) self._ptr_desc["memoryview"] = host_mem self._ptr = 0 self._owner = None @@ -302,9 +310,15 @@ def spill(self, target: str = "cpu") -> None: # trigger a new call to this buffer's `spill()`. # Therefore, it is important that spilling-on-demand doesn't # try to unspill an already locked buffer! - dev_mem = rmm.DeviceBuffer.to_device( - self._ptr_desc.pop("memoryview") - ) + with annotate( + message="SpillHtoD", + color=_get_color_for_nvtx("SpillHtoD"), + domain="cudf_python-spill", + ): + + dev_mem = rmm.DeviceBuffer.to_device( + self._ptr_desc.pop("memoryview") + ) self._ptr = dev_mem.ptr self._owner = dev_mem assert self._size == dev_mem.size