Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nvtx annotations to spilling-based data movement #14196

Merged
merged 3 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions python/cudf/cudf/core/buffer/spill_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,20 @@
import weakref
from collections import defaultdict
from dataclasses import dataclass
from functools import partial
from typing import Dict, List, Optional, Tuple

import rmm.mr

from cudf.core.buffer.spillable_buffer import SpillableBuffer
from cudf.options import get_option
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.string import format_bytes

_spill_cudf_nvtx_annotate = partial(
_cudf_nvtx_annotate, domain="cudf_python-spill"
)


def get_traceback() -> str:
"""Pretty print current traceback to a string"""
Expand Down Expand Up @@ -329,6 +335,7 @@ def buffers(
ret = tuple(sorted(ret, key=lambda b: b.last_accessed))
return ret

@_spill_cudf_nvtx_annotate
def spill_device_memory(self, nbytes: int) -> int:
"""Try to spill device memory

Expand Down
24 changes: 19 additions & 5 deletions python/cudf/cudf/core/buffer/spillable_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
get_ptr_and_size,
host_memory_allocation,
)
from cudf.utils.nvtx_annotation import _get_color_for_nvtx, annotate
from cudf.utils.string import format_bytes

if TYPE_CHECKING:
Expand Down Expand Up @@ -291,8 +292,15 @@ def spill(self, target: str = "cpu") -> None:
)

if (ptr_type, target) == ("gpu", "cpu"):
host_mem = host_memory_allocation(self.size)
rmm._lib.device_buffer.copy_ptr_to_host(self._ptr, host_mem)
with annotate(
message="SpillDtoH",
color=_get_color_for_nvtx("SpillDtoH"),
domain="cudf_python-spill",
):
host_mem = host_memory_allocation(self.size)
rmm._lib.device_buffer.copy_ptr_to_host(
self._ptr, host_mem
)
self._ptr_desc["memoryview"] = host_mem
self._ptr = 0
self._owner = None
Expand All @@ -302,9 +310,15 @@ def spill(self, target: str = "cpu") -> None:
# trigger a new call to this buffer's `spill()`.
# Therefore, it is important that spilling-on-demand doesn't
# try to unspill an already locked buffer!
dev_mem = rmm.DeviceBuffer.to_device(
self._ptr_desc.pop("memoryview")
)
with annotate(
message="SpillHtoD",
color=_get_color_for_nvtx("SpillHtoD"),
domain="cudf_python-spill",
):

dev_mem = rmm.DeviceBuffer.to_device(
self._ptr_desc.pop("memoryview")
)
self._ptr = dev_mem.ptr
self._owner = dev_mem
assert self._size == dev_mem.size
Expand Down
7 changes: 2 additions & 5 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,8 @@
min_scalar_type,
numeric_normalize_types,
)
from cudf.utils.utils import (
GetAttrGetItemMixin,
_cudf_nvtx_annotate,
_external_only_api,
)
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api

_cupy_nan_methods_map = {
"min": "nanmin",
Expand Down
7 changes: 2 additions & 5 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,8 @@
from cudf.utils import ioutils
from cudf.utils.docutils import copy_docstring
from cudf.utils.dtypes import find_common_type
from cudf.utils.utils import (
_array_ufunc,
_cudf_nvtx_annotate,
_warn_no_dask_cudf,
)
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf


# TODO: It looks like Frame is missing a declaration of `copy`, need to add
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
from cudf.core.mixins import Reducible, Scannable
from cudf.core.multiindex import MultiIndex
from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply
from cudf.utils.utils import GetAttrGetItemMixin, _cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.utils import GetAttrGetItemMixin


# The three functions below return the quantiles [25%, 50%, 75%]
Expand Down
8 changes: 2 additions & 6 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,8 @@
is_mixed_with_object_dtype,
numeric_normalize_types,
)
from cudf.utils.utils import (
_cudf_nvtx_annotate,
_is_same_name,
_warn_no_dask_cudf,
search_range,
)
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.utils import _is_same_name, _warn_no_dask_cudf, search_range


def _lexsorted_equal_range(
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@
)
from cudf.utils import docutils
from cudf.utils._numba import _CUDFNumbaConfig
from cudf.utils.utils import _cudf_nvtx_annotate, _warn_no_dask_cudf
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.utils import _warn_no_dask_cudf

doc_reset_index_template = """
Reset the index of the {klass}, or a level of it.
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
from cudf.core._compat import PANDAS_GE_150
from cudf.core.frame import Frame
from cudf.core.index import BaseIndex, _lexsorted_equal_range, as_index
from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate, _is_same_name
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.utils import NotIterable, _is_same_name


def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
is_mixed_with_object_dtype,
to_cudf_compatible_scalar,
)
from cudf.utils.utils import _cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate


def _format_percentile_names(percentiles):
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
)
from cudf.core.column import ColumnBase, as_column
from cudf.core.frame import Frame
from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.utils import NotIterable


class SingleColumnFrame(Frame, NotIterable):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/udf/groupby_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
_supported_dtypes_from_frame,
)
from cudf.utils._numba import _CUDFNumbaConfig
from cudf.utils.utils import _cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate


def _get_frame_groupby_type(dtype, index_dtype):
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/udf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
STRING_TYPES,
TIMEDELTA_TYPES,
)
from cudf.utils.utils import _cudf_nvtx_annotate, initfunc
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
from cudf.utils.utils import initfunc

# Maximum size of a string column is 2 GiB
_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get(
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from cudf.api.types import is_scalar
from cudf.utils import ioutils
from cudf.utils.dtypes import _maybe_convert_to_default_type
from cudf.utils.utils import _cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate


@_cudf_nvtx_annotate
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from cudf.api.types import is_list_like
from cudf.core.column import build_categorical_column, column_empty, full
from cudf.utils import ioutils
from cudf.utils.utils import _cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate

BYTE_SIZES = {
"kb": 1000,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/io/text.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
# Copyright (c) 2018-2023, NVIDIA CORPORATION.

from io import BytesIO, StringIO

import cudf
from cudf._lib import text as libtext
from cudf.utils import ioutils
from cudf.utils.utils import _cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate


@_cudf_nvtx_annotate
Expand Down
30 changes: 30 additions & 0 deletions python/cudf/cudf/utils/nvtx_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2023, NVIDIA CORPORATION.

import hashlib
from functools import partial

from nvtx import annotate

_NVTX_COLORS = ["green", "blue", "purple", "rapids"]


def _get_color_for_nvtx(name):
m = hashlib.sha256()
m.update(name.encode())
hash_value = int(m.hexdigest(), 16)
idx = hash_value % len(_NVTX_COLORS)
return _NVTX_COLORS[idx]


def _cudf_nvtx_annotate(func, domain="cudf_python"):
"""Decorator for applying nvtx annotations to methods in cudf."""
return annotate(
message=func.__qualname__,
color=_get_color_for_nvtx(func.__qualname__),
domain=domain,
)(func)


_dask_cudf_nvtx_annotate = partial(
_cudf_nvtx_annotate, domain="dask_cudf_python"
)
27 changes: 0 additions & 27 deletions python/cudf/cudf/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,12 @@

import decimal
import functools
import hashlib
import os
import traceback
import warnings
from functools import partial
from typing import FrozenSet, Set, Union

import numpy as np
from nvtx import annotate

import rmm

Expand Down Expand Up @@ -119,8 +116,6 @@ def _array_ufunc(obj, ufunc, method, inputs, kwargs):
"__ge__",
}

_NVTX_COLORS = ["green", "blue", "purple", "rapids"]

# The test root is set by pytest to support situations where tests are run from
# a source tree on a built version of cudf.
NO_EXTERNAL_ONLY_APIS = os.getenv("NO_EXTERNAL_ONLY_APIS")
Expand Down Expand Up @@ -353,28 +348,6 @@ def is_na_like(obj):
return obj is None or obj is cudf.NA or obj is cudf.NaT


def _get_color_for_nvtx(name):
m = hashlib.sha256()
m.update(name.encode())
hash_value = int(m.hexdigest(), 16)
idx = hash_value % len(_NVTX_COLORS)
return _NVTX_COLORS[idx]


def _cudf_nvtx_annotate(func, domain="cudf_python"):
"""Decorator for applying nvtx annotations to methods in cudf."""
return annotate(
message=func.__qualname__,
color=_get_color_for_nvtx(func.__qualname__),
domain=domain,
)(func)


_dask_cudf_nvtx_annotate = partial(
_cudf_nvtx_annotate, domain="dask_cudf_python"
)


def _warn_no_dask_cudf(fn):
@functools.wraps(fn)
def wrapper(self):
Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dask_cudf/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

import cudf
from cudf.api.types import is_string_dtype
from cudf.utils.utils import _dask_cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate

from .core import DataFrame, Index, Series

Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dask_cudf/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import cudf
from cudf import _lib as libcudf
from cudf.utils.utils import _dask_cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate

from dask_cudf import sorting
from dask_cudf.accessors import ListMethods, StructMethods
Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dask_cudf/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dask.utils import funcname

import cudf
from cudf.utils.utils import _dask_cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate

# aggregations that are dask-cudf optimized
OPTIMIZED_AGGS = (
Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dask_cudf/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import cudf as gd
from cudf.api.types import is_categorical_dtype
from cudf.utils.utils import _dask_cudf_nvtx_annotate
from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate


@_dask_cudf_nvtx_annotate
Expand Down