Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add python wrapper for system memory resource #1605

Merged
merged 16 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions python/rmm/rmm/_lib/memory_resource.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ cdef class CudaMemoryResource(DeviceMemoryResource):
cdef class ManagedMemoryResource(DeviceMemoryResource):
pass

cdef class SystemMemoryResource(DeviceMemoryResource):
pass

cdef class SamHeadroomResourceAdaptor(DeviceMemoryResource):
vyasr marked this conversation as resolved.
Show resolved Hide resolved
cdef readonly DeviceMemoryResource system_mr

cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
pass

Expand Down
55 changes: 55 additions & 0 deletions python/rmm/rmm/_lib/memory_resource.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \
cdef cppclass managed_memory_resource(device_memory_resource):
managed_memory_resource() except +

cdef extern from "rmm/mr/device/system_memory_resource.hpp" \
namespace "rmm::mr" nogil:
cdef cppclass system_memory_resource(device_memory_resource):
system_memory_resource() except +

cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
namespace "rmm::mr" nogil:

Expand Down Expand Up @@ -170,6 +175,13 @@ cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
size_t get_allocated_bytes() except +
size_t get_allocation_limit() except +

cdef extern from "rmm/mr/device/sam_headroom_resource_adaptor.hpp" \
namespace "rmm::mr" nogil:
cdef cppclass sam_headroom_resource_adaptor[Upstream](device_memory_resource):
sam_headroom_resource_adaptor(
Upstream* upstream_mr,
size_t headroom) except +

cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
namespace "rmm::mr" nogil:
cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
Expand Down Expand Up @@ -366,6 +378,49 @@ cdef class ManagedMemoryResource(DeviceMemoryResource):
pass


cdef class SystemMemoryResource(DeviceMemoryResource):
vyasr marked this conversation as resolved.
Show resolved Hide resolved
def __cinit__(self):
self.c_obj.reset(
new system_memory_resource()
)

def __init__(self):
"""
Memory resource that uses ``malloc``/``free`` for
allocation/deallocation.
"""
pass


cdef class SamHeadroomResourceAdaptor(DeviceMemoryResource):
Copy link
Contributor

@bdice bdice Jul 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An adaptor accepts another resource as an argument (e.g. a logging adaptor adds logging, but passes through to the underlying MR). This is not an adaptor, it is implemented like an entirely new resource.

Adaptors must be composable. It cannot be specific to the System MR and must accept other MRs if it is to be an adaptor.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we redesign this so that the SystemMemoryResource has an argument headroom and we don't need the "adaptor" at all? Or rename this so it's not an adaptor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The C++ side has both the system mr and the adaptor, but I guess we don't have to strictly mirror it in python. I'd be happy to have a single SystemMemoryResource with an argument.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

def __cinit__(
self,
size_t headroom
):
self.system_mr = SystemMemoryResource()
self.c_obj.reset(
new sam_headroom_resource_adaptor[system_memory_resource](
vyasr marked this conversation as resolved.
Show resolved Hide resolved
<system_memory_resource*> self.system_mr.get_mr(),
headroom
)
)

def __init__(
self,
size_t headroom
):
"""
Memory resource that adapts system memory resource to allocate memory
with a headroom.

Parameters
----------
headroom : size_t
Size of the reserved GPU memory as headroom
"""
pass


cdef class PoolMemoryResource(UpstreamResourceAdaptor):

def __cinit__(
Expand Down
4 changes: 4 additions & 0 deletions python/rmm/rmm/mr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
ManagedMemoryResource,
PoolMemoryResource,
PrefetchResourceAdaptor,
SamHeadroomResourceAdaptor,
StatisticsResourceAdaptor,
SystemMemoryResource,
TrackingResourceAdaptor,
UpstreamResourceAdaptor,
_flush_logs,
Expand Down Expand Up @@ -54,7 +56,9 @@
"ManagedMemoryResource",
"PoolMemoryResource",
"PrefetchResourceAdaptor",
"SamHeadroomResourceAdaptor",
"StatisticsResourceAdaptor",
"SystemMemoryResource",
"TrackingResourceAdaptor",
"FailureCallbackResourceAdaptor",
"UpstreamResourceAdaptor",
Expand Down
2 changes: 1 addition & 1 deletion python/rmm/rmm/rmm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
52 changes: 50 additions & 2 deletions python/rmm/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
_runtime_version >= 11020
)

_SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute(
cudart.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess,
rmm._cuda.gpu.getDevice(),
)


def array_tester(dtype, nelem, alloc):
# data
Expand Down Expand Up @@ -91,6 +96,37 @@ def test_rmm_modes(dtype, nelem, alloc, managed, pool):
array_tester(dtype, nelem, alloc)


@pytest.mark.skipif(
not _SYSTEM_MEMORY_SUPPORTED,
reason="System memory not supported",
)
@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
@pytest.mark.parametrize(
"system, pool, headroom", list(product([False, True], [False, True], [False, True]))
)
def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool, headroom):
assert rmm.is_initialized()
array_tester(dtype, nelem, alloc)

if system and headroom:
base_mr = rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20)
elif system:
base_mr = rmm.mr.SystemMemoryResource()
else:
base_mr = rmm.mr.CudaMemoryResource()
if pool:
mr = rmm.mr.PoolMemoryResource(base_mr)
else:
mr = base_mr
rmm.mr.set_current_device_resource(mr)

assert rmm.is_initialized()

array_tester(dtype, nelem, alloc)


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
Expand Down Expand Up @@ -410,7 +446,13 @@ def test_pool_memory_resource(dtype, nelem, alloc):
[
lambda: rmm.mr.CudaMemoryResource(),
lambda: rmm.mr.ManagedMemoryResource(),
],
]
+ (
[lambda: rmm.mr.SystemMemoryResource(),
lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20)]
if _SYSTEM_MEMORY_SUPPORTED
else []
),
)
def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream):
mr = rmm.mr.FixedSizeMemoryResource(
Expand All @@ -432,7 +474,13 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream):
lambda: rmm.mr.PoolMemoryResource(
rmm.mr.CudaMemoryResource(), 1 << 20
),
],
]
+ (
[lambda: rmm.mr.SystemMemoryResource(),
lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20)]
if _SYSTEM_MEMORY_SUPPORTED
else []
),
)
def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr):
upstream = upstream_mr()
Expand Down
Loading