Skip to content

Commit

Permalink
Add argument to enable RMM alloaction tracking in benchmarks (#1145)
Browse files Browse the repository at this point in the history
Tracking RMM allocation will be useful together with dask/distributed#5740 , and will help with the analysis of memory fragmentation when comparing regular pool and the async memory allocator.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - Benjamin Zaitlen (https://github.com/quasiben)

URL: #1145
  • Loading branch information
pentschev authored Mar 29, 2023
1 parent 9fef6b7 commit 2079152
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
1 change: 1 addition & 0 deletions dask_cuda/benchmarks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def run(client: Client, args: Namespace, config: Config):
args.rmm_release_threshold,
args.rmm_log_directory,
args.enable_rmm_statistics,
args.enable_rmm_track_allocations,
)
address_to_index, results, message_data = gather_bench_results(client, args, config)
p2p_bw = peer_to_peer_bandwidths(message_data, address_to_index)
Expand Down
20 changes: 20 additions & 0 deletions dask_cuda/benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,17 @@ def parse_benchmark_args(description="Generic dask-cuda Benchmark", args_list=[]
"This enables spilling implementations such as JIT-Unspill to provides more "
"information on out-of-memory errors",
)
cluster_args.add_argument(
"--enable-rmm-track-allocations",
action="store_true",
help="When enabled, wraps the memory resource used by each worker with a "
"``rmm.mr.TrackingResourceAdaptor``, which tracks the amount of memory "
"allocated."
"NOTE: This option enables additional diagnostics to be collected and "
"reported by the Dask dashboard. However, there is significant overhead "
"associated with this and it should only be used for debugging and memory "
"profiling.",
)
cluster_args.add_argument(
"--enable-tcp-over-ucx",
default=None,
Expand Down Expand Up @@ -339,6 +350,7 @@ def get_cluster_options(args):
"CUDA_VISIBLE_DEVICES": args.devs,
"interface": args.interface,
"device_memory_limit": args.device_memory_limit,
"dashboard_address": 18787,
**ucx_options,
}
if args.no_silence_logs:
Expand Down Expand Up @@ -370,6 +382,7 @@ def setup_memory_pool(
release_threshold=None,
log_directory=None,
statistics=False,
rmm_track_allocations=False,
):
import cupy

Expand Down Expand Up @@ -399,6 +412,10 @@ def setup_memory_pool(
rmm.mr.set_current_device_resource(
rmm.mr.StatisticsResourceAdaptor(rmm.mr.get_current_device_resource())
)
if rmm_track_allocations:
rmm.mr.set_current_device_resource(
rmm.mr.TrackingResourceAdaptor(rmm.mr.get_current_device_resource())
)


def setup_memory_pools(
Expand All @@ -411,6 +428,7 @@ def setup_memory_pools(
release_threshold,
log_directory,
statistics,
rmm_track_allocations,
):
if not is_gpu:
return
Expand All @@ -423,6 +441,7 @@ def setup_memory_pools(
release_threshold=release_threshold,
log_directory=log_directory,
statistics=statistics,
rmm_track_allocations=rmm_track_allocations,
)
# Create an RMM pool on the scheduler due to occasional deserialization
# of CUDA objects. May cause issues with InfiniBand otherwise.
Expand All @@ -435,6 +454,7 @@ def setup_memory_pools(
release_threshold=release_threshold,
log_directory=log_directory,
statistics=statistics,
rmm_track_allocations=rmm_track_allocations,
)


Expand Down

0 comments on commit 2079152

Please sign in to comment.